{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 12527, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.9827572443522e-05, "grad_norm": 1.8585955195028683, "learning_rate": 5.319148936170213e-08, "loss": 4.2119, "step": 1 }, { "epoch": 0.000159655144887044, "grad_norm": 1.8818527224732502, "learning_rate": 1.0638297872340426e-07, "loss": 4.3216, "step": 2 }, { "epoch": 0.00023948271733056597, "grad_norm": 1.9033843889661506, "learning_rate": 1.5957446808510638e-07, "loss": 4.2331, "step": 3 }, { "epoch": 0.000319310289774088, "grad_norm": 1.7527693443406147, "learning_rate": 2.1276595744680852e-07, "loss": 4.1398, "step": 4 }, { "epoch": 0.00039913786221760996, "grad_norm": 1.893307152379322, "learning_rate": 2.6595744680851066e-07, "loss": 4.2229, "step": 5 }, { "epoch": 0.00047896543466113194, "grad_norm": 1.9059171310779845, "learning_rate": 3.1914893617021275e-07, "loss": 4.2203, "step": 6 }, { "epoch": 0.0005587930071046539, "grad_norm": 1.9107548608010763, "learning_rate": 3.723404255319149e-07, "loss": 4.2105, "step": 7 }, { "epoch": 0.000638620579548176, "grad_norm": 1.7666878868933364, "learning_rate": 4.2553191489361704e-07, "loss": 4.1956, "step": 8 }, { "epoch": 0.000718448151991698, "grad_norm": 1.8217697605119803, "learning_rate": 4.787234042553192e-07, "loss": 4.1875, "step": 9 }, { "epoch": 0.0007982757244352199, "grad_norm": 1.83538895137744, "learning_rate": 5.319148936170213e-07, "loss": 4.1461, "step": 10 }, { "epoch": 0.0008781032968787419, "grad_norm": 2.0638518357399045, "learning_rate": 5.851063829787235e-07, "loss": 4.3144, "step": 11 }, { "epoch": 0.0009579308693222639, "grad_norm": 1.936089510045413, "learning_rate": 6.382978723404255e-07, "loss": 4.2199, "step": 12 }, { "epoch": 0.0010377584417657858, "grad_norm": 1.8862195347887736, "learning_rate": 6.914893617021278e-07, "loss": 4.1903, "step": 13 }, { "epoch": 0.0011175860142093078, "grad_norm": 1.9414342090661474, "learning_rate": 7.446808510638298e-07, "loss": 4.2204, "step": 14 }, { "epoch": 0.0011974135866528299, "grad_norm": 1.9035088297379537, "learning_rate": 7.97872340425532e-07, "loss": 4.207, "step": 15 }, { "epoch": 0.001277241159096352, "grad_norm": 1.9464729549579582, "learning_rate": 8.510638297872341e-07, "loss": 4.2285, "step": 16 }, { "epoch": 0.001357068731539874, "grad_norm": 2.0018717869775964, "learning_rate": 9.042553191489363e-07, "loss": 4.2025, "step": 17 }, { "epoch": 0.001436896303983396, "grad_norm": 1.8344847041653434, "learning_rate": 9.574468085106384e-07, "loss": 4.1225, "step": 18 }, { "epoch": 0.0015167238764269178, "grad_norm": 1.9080061953344183, "learning_rate": 1.0106382978723404e-06, "loss": 4.0918, "step": 19 }, { "epoch": 0.0015965514488704398, "grad_norm": 2.0179000336081283, "learning_rate": 1.0638297872340427e-06, "loss": 4.3013, "step": 20 }, { "epoch": 0.0016763790213139619, "grad_norm": 1.9510948428868475, "learning_rate": 1.1170212765957447e-06, "loss": 4.2387, "step": 21 }, { "epoch": 0.0017562065937574839, "grad_norm": 2.0487328615663736, "learning_rate": 1.170212765957447e-06, "loss": 4.2494, "step": 22 }, { "epoch": 0.001836034166201006, "grad_norm": 1.7695373749982408, "learning_rate": 1.223404255319149e-06, "loss": 4.1928, "step": 23 }, { "epoch": 0.0019158617386445277, "grad_norm": 1.9622075402872372, "learning_rate": 1.276595744680851e-06, "loss": 4.2728, "step": 24 }, { "epoch": 0.00199568931108805, "grad_norm": 2.0618436731847782, "learning_rate": 1.3297872340425533e-06, "loss": 4.2437, "step": 25 }, { "epoch": 0.0020755168835315716, "grad_norm": 1.7674197515595793, "learning_rate": 1.3829787234042555e-06, "loss": 4.1852, "step": 26 }, { "epoch": 0.0021553444559750936, "grad_norm": 1.75548481106544, "learning_rate": 1.4361702127659578e-06, "loss": 4.0987, "step": 27 }, { "epoch": 0.0022351720284186157, "grad_norm": 1.9943784666059814, "learning_rate": 1.4893617021276596e-06, "loss": 4.3874, "step": 28 }, { "epoch": 0.0023149996008621377, "grad_norm": 2.0298682513389035, "learning_rate": 1.5425531914893618e-06, "loss": 4.2223, "step": 29 }, { "epoch": 0.0023948271733056597, "grad_norm": 1.9823823796189513, "learning_rate": 1.595744680851064e-06, "loss": 4.1291, "step": 30 }, { "epoch": 0.0024746547457491818, "grad_norm": 1.8974533731532153, "learning_rate": 1.648936170212766e-06, "loss": 4.1012, "step": 31 }, { "epoch": 0.002554482318192704, "grad_norm": 2.0801629106910804, "learning_rate": 1.7021276595744682e-06, "loss": 4.232, "step": 32 }, { "epoch": 0.002634309890636226, "grad_norm": 2.145663851419142, "learning_rate": 1.7553191489361704e-06, "loss": 4.2755, "step": 33 }, { "epoch": 0.002714137463079748, "grad_norm": 2.3104231914544986, "learning_rate": 1.8085106382978727e-06, "loss": 4.2647, "step": 34 }, { "epoch": 0.00279396503552327, "grad_norm": 2.1250148152742154, "learning_rate": 1.8617021276595745e-06, "loss": 4.2092, "step": 35 }, { "epoch": 0.002873792607966792, "grad_norm": 2.085891182514869, "learning_rate": 1.9148936170212767e-06, "loss": 4.1505, "step": 36 }, { "epoch": 0.0029536201804103135, "grad_norm": 2.162686456843843, "learning_rate": 1.968085106382979e-06, "loss": 4.0594, "step": 37 }, { "epoch": 0.0030334477528538356, "grad_norm": 2.231127325580298, "learning_rate": 2.021276595744681e-06, "loss": 4.216, "step": 38 }, { "epoch": 0.0031132753252973576, "grad_norm": 2.291138373553168, "learning_rate": 2.074468085106383e-06, "loss": 4.3143, "step": 39 }, { "epoch": 0.0031931028977408796, "grad_norm": 2.2973664547304007, "learning_rate": 2.1276595744680853e-06, "loss": 4.2205, "step": 40 }, { "epoch": 0.0032729304701844017, "grad_norm": 2.372183544918573, "learning_rate": 2.1808510638297876e-06, "loss": 4.2388, "step": 41 }, { "epoch": 0.0033527580426279237, "grad_norm": 2.328594207574548, "learning_rate": 2.2340425531914894e-06, "loss": 4.1978, "step": 42 }, { "epoch": 0.0034325856150714457, "grad_norm": 2.040839097187458, "learning_rate": 2.2872340425531916e-06, "loss": 3.981, "step": 43 }, { "epoch": 0.0035124131875149678, "grad_norm": 2.2524683168946673, "learning_rate": 2.340425531914894e-06, "loss": 4.1199, "step": 44 }, { "epoch": 0.00359224075995849, "grad_norm": 2.3013124266917027, "learning_rate": 2.393617021276596e-06, "loss": 4.1943, "step": 45 }, { "epoch": 0.003672068332402012, "grad_norm": 2.248871067919234, "learning_rate": 2.446808510638298e-06, "loss": 4.0844, "step": 46 }, { "epoch": 0.0037518959048455335, "grad_norm": 2.232997788229633, "learning_rate": 2.5e-06, "loss": 4.1137, "step": 47 }, { "epoch": 0.0038317234772890555, "grad_norm": 2.112557446889099, "learning_rate": 2.553191489361702e-06, "loss": 4.0497, "step": 48 }, { "epoch": 0.0039115510497325775, "grad_norm": 2.1813170948999594, "learning_rate": 2.6063829787234047e-06, "loss": 4.0343, "step": 49 }, { "epoch": 0.0039913786221761, "grad_norm": 2.2655637352133025, "learning_rate": 2.6595744680851065e-06, "loss": 4.1009, "step": 50 }, { "epoch": 0.004071206194619622, "grad_norm": 2.0915854339145783, "learning_rate": 2.7127659574468084e-06, "loss": 3.9247, "step": 51 }, { "epoch": 0.004151033767063143, "grad_norm": 2.428027286314903, "learning_rate": 2.765957446808511e-06, "loss": 4.1186, "step": 52 }, { "epoch": 0.004230861339506666, "grad_norm": 2.3536079026987755, "learning_rate": 2.819148936170213e-06, "loss": 3.9889, "step": 53 }, { "epoch": 0.004310688911950187, "grad_norm": 2.3946459422400768, "learning_rate": 2.8723404255319155e-06, "loss": 3.9836, "step": 54 }, { "epoch": 0.00439051648439371, "grad_norm": 2.2334473965544372, "learning_rate": 2.9255319148936174e-06, "loss": 3.9995, "step": 55 }, { "epoch": 0.004470344056837231, "grad_norm": 2.156980119755618, "learning_rate": 2.978723404255319e-06, "loss": 3.9457, "step": 56 }, { "epoch": 0.004550171629280754, "grad_norm": 2.071433372266182, "learning_rate": 3.031914893617022e-06, "loss": 3.946, "step": 57 }, { "epoch": 0.004629999201724275, "grad_norm": 2.0383073938941103, "learning_rate": 3.0851063829787237e-06, "loss": 3.7904, "step": 58 }, { "epoch": 0.004709826774167798, "grad_norm": 2.1983126276024594, "learning_rate": 3.1382978723404255e-06, "loss": 3.9168, "step": 59 }, { "epoch": 0.0047896543466113195, "grad_norm": 2.104553539888862, "learning_rate": 3.191489361702128e-06, "loss": 3.7828, "step": 60 }, { "epoch": 0.004869481919054842, "grad_norm": 2.0823227336201415, "learning_rate": 3.24468085106383e-06, "loss": 3.8325, "step": 61 }, { "epoch": 0.0049493094914983635, "grad_norm": 2.0929161630933435, "learning_rate": 3.297872340425532e-06, "loss": 3.8039, "step": 62 }, { "epoch": 0.005029137063941885, "grad_norm": 2.092557896342344, "learning_rate": 3.3510638297872345e-06, "loss": 3.8346, "step": 63 }, { "epoch": 0.005108964636385408, "grad_norm": 1.78867512467737, "learning_rate": 3.4042553191489363e-06, "loss": 3.6865, "step": 64 }, { "epoch": 0.005188792208828929, "grad_norm": 2.3562581381966186, "learning_rate": 3.457446808510639e-06, "loss": 3.8353, "step": 65 }, { "epoch": 0.005268619781272452, "grad_norm": 1.8855924255216, "learning_rate": 3.510638297872341e-06, "loss": 3.6275, "step": 66 }, { "epoch": 0.005348447353715973, "grad_norm": 1.884541055338765, "learning_rate": 3.5638297872340426e-06, "loss": 3.639, "step": 67 }, { "epoch": 0.005428274926159496, "grad_norm": 2.0583552108664134, "learning_rate": 3.6170212765957453e-06, "loss": 3.7053, "step": 68 }, { "epoch": 0.005508102498603017, "grad_norm": 1.860182260690366, "learning_rate": 3.670212765957447e-06, "loss": 3.6254, "step": 69 }, { "epoch": 0.00558793007104654, "grad_norm": 1.8626171791657198, "learning_rate": 3.723404255319149e-06, "loss": 3.6687, "step": 70 }, { "epoch": 0.005667757643490061, "grad_norm": 1.7264764934594066, "learning_rate": 3.7765957446808516e-06, "loss": 3.5189, "step": 71 }, { "epoch": 0.005747585215933584, "grad_norm": 1.7227991908267064, "learning_rate": 3.8297872340425535e-06, "loss": 3.5687, "step": 72 }, { "epoch": 0.0058274127883771055, "grad_norm": 1.6238508747163407, "learning_rate": 3.882978723404256e-06, "loss": 3.4917, "step": 73 }, { "epoch": 0.005907240360820627, "grad_norm": 1.6190097665282972, "learning_rate": 3.936170212765958e-06, "loss": 3.4724, "step": 74 }, { "epoch": 0.0059870679332641496, "grad_norm": 1.6011426737488266, "learning_rate": 3.98936170212766e-06, "loss": 3.4846, "step": 75 }, { "epoch": 0.006066895505707671, "grad_norm": 1.571812763281431, "learning_rate": 4.042553191489362e-06, "loss": 3.4311, "step": 76 }, { "epoch": 0.006146723078151194, "grad_norm": 1.5174355798231711, "learning_rate": 4.095744680851064e-06, "loss": 3.445, "step": 77 }, { "epoch": 0.006226550650594715, "grad_norm": 1.4044726211471903, "learning_rate": 4.148936170212766e-06, "loss": 3.3422, "step": 78 }, { "epoch": 0.006306378223038238, "grad_norm": 1.4136324094950958, "learning_rate": 4.202127659574468e-06, "loss": 3.3582, "step": 79 }, { "epoch": 0.006386205795481759, "grad_norm": 1.382559014622941, "learning_rate": 4.255319148936171e-06, "loss": 3.3155, "step": 80 }, { "epoch": 0.006466033367925282, "grad_norm": 1.4363245372293252, "learning_rate": 4.308510638297873e-06, "loss": 3.3575, "step": 81 }, { "epoch": 0.006545860940368803, "grad_norm": 1.4040192172574961, "learning_rate": 4.361702127659575e-06, "loss": 3.3183, "step": 82 }, { "epoch": 0.006625688512812325, "grad_norm": 1.2464312402104474, "learning_rate": 4.414893617021277e-06, "loss": 3.2234, "step": 83 }, { "epoch": 0.006705516085255847, "grad_norm": 1.1958938050614567, "learning_rate": 4.468085106382979e-06, "loss": 3.1456, "step": 84 }, { "epoch": 0.006785343657699369, "grad_norm": 1.3138756311314883, "learning_rate": 4.521276595744681e-06, "loss": 3.2192, "step": 85 }, { "epoch": 0.0068651712301428915, "grad_norm": 1.2040610125388842, "learning_rate": 4.574468085106383e-06, "loss": 3.1371, "step": 86 }, { "epoch": 0.006944998802586413, "grad_norm": 1.291959735594222, "learning_rate": 4.6276595744680855e-06, "loss": 3.2005, "step": 87 }, { "epoch": 0.0070248263750299356, "grad_norm": 1.1172010445422527, "learning_rate": 4.680851063829788e-06, "loss": 3.1091, "step": 88 }, { "epoch": 0.007104653947473457, "grad_norm": 1.2485953849824325, "learning_rate": 4.73404255319149e-06, "loss": 3.1808, "step": 89 }, { "epoch": 0.00718448151991698, "grad_norm": 1.1709461201936575, "learning_rate": 4.787234042553192e-06, "loss": 3.1197, "step": 90 }, { "epoch": 0.007264309092360501, "grad_norm": 1.1427107562668657, "learning_rate": 4.840425531914894e-06, "loss": 3.0438, "step": 91 }, { "epoch": 0.007344136664804024, "grad_norm": 1.0636232392652234, "learning_rate": 4.893617021276596e-06, "loss": 2.9938, "step": 92 }, { "epoch": 0.007423964237247545, "grad_norm": 1.0813310390877064, "learning_rate": 4.946808510638298e-06, "loss": 2.9896, "step": 93 }, { "epoch": 0.007503791809691067, "grad_norm": 1.106208511276582, "learning_rate": 5e-06, "loss": 3.0307, "step": 94 }, { "epoch": 0.007583619382134589, "grad_norm": 1.1318732098144926, "learning_rate": 5.053191489361703e-06, "loss": 3.0567, "step": 95 }, { "epoch": 0.007663446954578111, "grad_norm": 1.0466290522605828, "learning_rate": 5.106382978723404e-06, "loss": 2.9521, "step": 96 }, { "epoch": 0.0077432745270216334, "grad_norm": 1.0325818889675618, "learning_rate": 5.159574468085107e-06, "loss": 2.9109, "step": 97 }, { "epoch": 0.007823102099465155, "grad_norm": 0.9546765084375874, "learning_rate": 5.212765957446809e-06, "loss": 2.8915, "step": 98 }, { "epoch": 0.007902929671908678, "grad_norm": 0.9776262223160028, "learning_rate": 5.265957446808511e-06, "loss": 2.9066, "step": 99 }, { "epoch": 0.0079827572443522, "grad_norm": 0.9829406835275062, "learning_rate": 5.319148936170213e-06, "loss": 2.8784, "step": 100 }, { "epoch": 0.00806258481679572, "grad_norm": 0.9915865961659707, "learning_rate": 5.372340425531915e-06, "loss": 2.876, "step": 101 }, { "epoch": 0.008142412389239243, "grad_norm": 0.9107621062164554, "learning_rate": 5.425531914893617e-06, "loss": 2.7832, "step": 102 }, { "epoch": 0.008222239961682766, "grad_norm": 0.935616239710402, "learning_rate": 5.47872340425532e-06, "loss": 2.7918, "step": 103 }, { "epoch": 0.008302067534126286, "grad_norm": 0.9134447054229488, "learning_rate": 5.531914893617022e-06, "loss": 2.7797, "step": 104 }, { "epoch": 0.008381895106569809, "grad_norm": 0.8958749465280538, "learning_rate": 5.5851063829787235e-06, "loss": 2.6685, "step": 105 }, { "epoch": 0.008461722679013331, "grad_norm": 0.8983893208829145, "learning_rate": 5.638297872340426e-06, "loss": 2.7405, "step": 106 }, { "epoch": 0.008541550251456854, "grad_norm": 0.874964429041674, "learning_rate": 5.691489361702128e-06, "loss": 2.7235, "step": 107 }, { "epoch": 0.008621377823900375, "grad_norm": 0.9316683157593606, "learning_rate": 5.744680851063831e-06, "loss": 2.6891, "step": 108 }, { "epoch": 0.008701205396343897, "grad_norm": 0.8866448891068709, "learning_rate": 5.7978723404255325e-06, "loss": 2.6227, "step": 109 }, { "epoch": 0.00878103296878742, "grad_norm": 0.8618995270209394, "learning_rate": 5.851063829787235e-06, "loss": 2.6392, "step": 110 }, { "epoch": 0.008860860541230942, "grad_norm": 0.827996366978005, "learning_rate": 5.904255319148937e-06, "loss": 2.5418, "step": 111 }, { "epoch": 0.008940688113674463, "grad_norm": 0.8458165532767541, "learning_rate": 5.957446808510638e-06, "loss": 2.6372, "step": 112 }, { "epoch": 0.009020515686117985, "grad_norm": 0.8485870972909303, "learning_rate": 6.010638297872341e-06, "loss": 2.5761, "step": 113 }, { "epoch": 0.009100343258561508, "grad_norm": 0.8570450147112189, "learning_rate": 6.063829787234044e-06, "loss": 2.5233, "step": 114 }, { "epoch": 0.009180170831005028, "grad_norm": 0.8119939558994558, "learning_rate": 6.117021276595745e-06, "loss": 2.4895, "step": 115 }, { "epoch": 0.00925999840344855, "grad_norm": 0.8800853718559486, "learning_rate": 6.170212765957447e-06, "loss": 2.5042, "step": 116 }, { "epoch": 0.009339825975892073, "grad_norm": 0.8306869843382875, "learning_rate": 6.22340425531915e-06, "loss": 2.4311, "step": 117 }, { "epoch": 0.009419653548335596, "grad_norm": 0.8381456322259108, "learning_rate": 6.276595744680851e-06, "loss": 2.4503, "step": 118 }, { "epoch": 0.009499481120779116, "grad_norm": 0.8456798075813081, "learning_rate": 6.329787234042554e-06, "loss": 2.3914, "step": 119 }, { "epoch": 0.009579308693222639, "grad_norm": 0.8410525119303979, "learning_rate": 6.382978723404256e-06, "loss": 2.3493, "step": 120 }, { "epoch": 0.009659136265666161, "grad_norm": 0.8450931566424794, "learning_rate": 6.436170212765958e-06, "loss": 2.3426, "step": 121 }, { "epoch": 0.009738963838109684, "grad_norm": 0.8650565276997951, "learning_rate": 6.48936170212766e-06, "loss": 2.3288, "step": 122 }, { "epoch": 0.009818791410553205, "grad_norm": 0.8212079034132097, "learning_rate": 6.542553191489362e-06, "loss": 2.3596, "step": 123 }, { "epoch": 0.009898618982996727, "grad_norm": 0.8486799859658392, "learning_rate": 6.595744680851064e-06, "loss": 2.2602, "step": 124 }, { "epoch": 0.00997844655544025, "grad_norm": 0.8700994714391141, "learning_rate": 6.648936170212767e-06, "loss": 2.2589, "step": 125 }, { "epoch": 0.01005827412788377, "grad_norm": 0.8441420191305773, "learning_rate": 6.702127659574469e-06, "loss": 2.2248, "step": 126 }, { "epoch": 0.010138101700327293, "grad_norm": 0.8446022162780242, "learning_rate": 6.75531914893617e-06, "loss": 2.1838, "step": 127 }, { "epoch": 0.010217929272770815, "grad_norm": 0.8351127361555855, "learning_rate": 6.808510638297873e-06, "loss": 2.1104, "step": 128 }, { "epoch": 0.010297756845214338, "grad_norm": 0.8721674344300163, "learning_rate": 6.861702127659575e-06, "loss": 2.1505, "step": 129 }, { "epoch": 0.010377584417657858, "grad_norm": 0.9859368402927429, "learning_rate": 6.914893617021278e-06, "loss": 2.0975, "step": 130 }, { "epoch": 0.010457411990101381, "grad_norm": 1.0248982911025308, "learning_rate": 6.968085106382979e-06, "loss": 2.0245, "step": 131 }, { "epoch": 0.010537239562544903, "grad_norm": 1.0903424034589226, "learning_rate": 7.021276595744682e-06, "loss": 1.9467, "step": 132 }, { "epoch": 0.010617067134988426, "grad_norm": 0.9829834769646966, "learning_rate": 7.074468085106384e-06, "loss": 2.0054, "step": 133 }, { "epoch": 0.010696894707431947, "grad_norm": 0.8956237015833615, "learning_rate": 7.127659574468085e-06, "loss": 2.0352, "step": 134 }, { "epoch": 0.010776722279875469, "grad_norm": 0.82842219853682, "learning_rate": 7.1808510638297875e-06, "loss": 1.8857, "step": 135 }, { "epoch": 0.010856549852318991, "grad_norm": 0.8032369514135603, "learning_rate": 7.234042553191491e-06, "loss": 1.8889, "step": 136 }, { "epoch": 0.010936377424762512, "grad_norm": 0.8259643548151324, "learning_rate": 7.287234042553192e-06, "loss": 1.802, "step": 137 }, { "epoch": 0.011016204997206035, "grad_norm": 0.8676925534388947, "learning_rate": 7.340425531914894e-06, "loss": 1.8524, "step": 138 }, { "epoch": 0.011096032569649557, "grad_norm": 0.8854912466893621, "learning_rate": 7.3936170212765965e-06, "loss": 1.8573, "step": 139 }, { "epoch": 0.01117586014209308, "grad_norm": 0.8778370381934579, "learning_rate": 7.446808510638298e-06, "loss": 1.7434, "step": 140 }, { "epoch": 0.0112556877145366, "grad_norm": 0.8558381970986911, "learning_rate": 7.500000000000001e-06, "loss": 1.7187, "step": 141 }, { "epoch": 0.011335515286980123, "grad_norm": 0.7899759344025535, "learning_rate": 7.553191489361703e-06, "loss": 1.6876, "step": 142 }, { "epoch": 0.011415342859423645, "grad_norm": 0.7614092940674073, "learning_rate": 7.606382978723405e-06, "loss": 1.6529, "step": 143 }, { "epoch": 0.011495170431867168, "grad_norm": 0.7726036066199841, "learning_rate": 7.659574468085107e-06, "loss": 1.6128, "step": 144 }, { "epoch": 0.011574998004310688, "grad_norm": 0.8260094118980289, "learning_rate": 7.71276595744681e-06, "loss": 1.6281, "step": 145 }, { "epoch": 0.011654825576754211, "grad_norm": 0.8654176947345584, "learning_rate": 7.765957446808511e-06, "loss": 1.56, "step": 146 }, { "epoch": 0.011734653149197733, "grad_norm": 0.740135414445827, "learning_rate": 7.819148936170213e-06, "loss": 1.6002, "step": 147 }, { "epoch": 0.011814480721641254, "grad_norm": 0.6584335814876117, "learning_rate": 7.872340425531916e-06, "loss": 1.6145, "step": 148 }, { "epoch": 0.011894308294084777, "grad_norm": 0.6092586326280498, "learning_rate": 7.925531914893617e-06, "loss": 1.5385, "step": 149 }, { "epoch": 0.011974135866528299, "grad_norm": 0.5996436693776025, "learning_rate": 7.97872340425532e-06, "loss": 1.5764, "step": 150 }, { "epoch": 0.012053963438971822, "grad_norm": 0.5423456589994583, "learning_rate": 8.031914893617022e-06, "loss": 1.4241, "step": 151 }, { "epoch": 0.012133791011415342, "grad_norm": 0.5275630569981479, "learning_rate": 8.085106382978723e-06, "loss": 1.4672, "step": 152 }, { "epoch": 0.012213618583858865, "grad_norm": 0.5440556258334405, "learning_rate": 8.138297872340426e-06, "loss": 1.3719, "step": 153 }, { "epoch": 0.012293446156302387, "grad_norm": 0.5376071775481229, "learning_rate": 8.191489361702128e-06, "loss": 1.491, "step": 154 }, { "epoch": 0.012373273728745908, "grad_norm": 0.537656715131316, "learning_rate": 8.24468085106383e-06, "loss": 1.4162, "step": 155 }, { "epoch": 0.01245310130118943, "grad_norm": 0.540613766713406, "learning_rate": 8.297872340425532e-06, "loss": 1.4368, "step": 156 }, { "epoch": 0.012532928873632953, "grad_norm": 0.5225165050625163, "learning_rate": 8.351063829787235e-06, "loss": 1.3277, "step": 157 }, { "epoch": 0.012612756446076475, "grad_norm": 0.5033523705189225, "learning_rate": 8.404255319148937e-06, "loss": 1.3524, "step": 158 }, { "epoch": 0.012692584018519996, "grad_norm": 0.5237033351225415, "learning_rate": 8.457446808510638e-06, "loss": 1.3689, "step": 159 }, { "epoch": 0.012772411590963519, "grad_norm": 0.5146131974169392, "learning_rate": 8.510638297872341e-06, "loss": 1.2731, "step": 160 }, { "epoch": 0.012852239163407041, "grad_norm": 0.5030506889768592, "learning_rate": 8.563829787234044e-06, "loss": 1.2945, "step": 161 }, { "epoch": 0.012932066735850564, "grad_norm": 0.5179652718688802, "learning_rate": 8.617021276595746e-06, "loss": 1.3071, "step": 162 }, { "epoch": 0.013011894308294084, "grad_norm": 0.5346617772906882, "learning_rate": 8.670212765957447e-06, "loss": 1.3239, "step": 163 }, { "epoch": 0.013091721880737607, "grad_norm": 0.5174871392968272, "learning_rate": 8.72340425531915e-06, "loss": 1.3392, "step": 164 }, { "epoch": 0.01317154945318113, "grad_norm": 0.5165393549297441, "learning_rate": 8.776595744680852e-06, "loss": 1.3172, "step": 165 }, { "epoch": 0.01325137702562465, "grad_norm": 0.5117008044748248, "learning_rate": 8.829787234042555e-06, "loss": 1.247, "step": 166 }, { "epoch": 0.013331204598068172, "grad_norm": 0.5525878871419657, "learning_rate": 8.882978723404256e-06, "loss": 1.1929, "step": 167 }, { "epoch": 0.013411032170511695, "grad_norm": 0.5202350950786993, "learning_rate": 8.936170212765958e-06, "loss": 1.187, "step": 168 }, { "epoch": 0.013490859742955217, "grad_norm": 0.5263561328395504, "learning_rate": 8.98936170212766e-06, "loss": 1.1678, "step": 169 }, { "epoch": 0.013570687315398738, "grad_norm": 0.5573972779826423, "learning_rate": 9.042553191489362e-06, "loss": 1.2504, "step": 170 }, { "epoch": 0.01365051488784226, "grad_norm": 0.5286430486515088, "learning_rate": 9.095744680851063e-06, "loss": 1.1774, "step": 171 }, { "epoch": 0.013730342460285783, "grad_norm": 0.5182904570488054, "learning_rate": 9.148936170212767e-06, "loss": 1.1133, "step": 172 }, { "epoch": 0.013810170032729305, "grad_norm": 0.5254985652680076, "learning_rate": 9.20212765957447e-06, "loss": 1.1764, "step": 173 }, { "epoch": 0.013889997605172826, "grad_norm": 0.535823633132722, "learning_rate": 9.255319148936171e-06, "loss": 1.1385, "step": 174 }, { "epoch": 0.013969825177616349, "grad_norm": 0.49630052155568427, "learning_rate": 9.308510638297872e-06, "loss": 1.0711, "step": 175 }, { "epoch": 0.014049652750059871, "grad_norm": 0.469471719584633, "learning_rate": 9.361702127659576e-06, "loss": 1.1293, "step": 176 }, { "epoch": 0.014129480322503392, "grad_norm": 0.43715826441115563, "learning_rate": 9.414893617021279e-06, "loss": 1.1236, "step": 177 }, { "epoch": 0.014209307894946914, "grad_norm": 0.4607769615005427, "learning_rate": 9.46808510638298e-06, "loss": 1.0963, "step": 178 }, { "epoch": 0.014289135467390437, "grad_norm": 0.47377656373848653, "learning_rate": 9.521276595744681e-06, "loss": 1.1053, "step": 179 }, { "epoch": 0.01436896303983396, "grad_norm": 0.4580518638024032, "learning_rate": 9.574468085106385e-06, "loss": 1.1548, "step": 180 }, { "epoch": 0.01444879061227748, "grad_norm": 0.48271149644886735, "learning_rate": 9.627659574468086e-06, "loss": 1.0693, "step": 181 }, { "epoch": 0.014528618184721002, "grad_norm": 0.45981456045624836, "learning_rate": 9.680851063829787e-06, "loss": 1.0427, "step": 182 }, { "epoch": 0.014608445757164525, "grad_norm": 0.47919130522354403, "learning_rate": 9.73404255319149e-06, "loss": 1.0802, "step": 183 }, { "epoch": 0.014688273329608047, "grad_norm": 0.4284279060199463, "learning_rate": 9.787234042553192e-06, "loss": 1.0223, "step": 184 }, { "epoch": 0.014768100902051568, "grad_norm": 0.42445843400646804, "learning_rate": 9.840425531914895e-06, "loss": 1.0424, "step": 185 }, { "epoch": 0.01484792847449509, "grad_norm": 0.39888714906623407, "learning_rate": 9.893617021276596e-06, "loss": 1.0251, "step": 186 }, { "epoch": 0.014927756046938613, "grad_norm": 0.4310049321120679, "learning_rate": 9.946808510638298e-06, "loss": 1.1145, "step": 187 }, { "epoch": 0.015007583619382134, "grad_norm": 0.4350606820918159, "learning_rate": 1e-05, "loss": 1.0504, "step": 188 }, { "epoch": 0.015087411191825656, "grad_norm": 0.38846402298538124, "learning_rate": 1.0053191489361702e-05, "loss": 0.9874, "step": 189 }, { "epoch": 0.015167238764269179, "grad_norm": 0.4079175430945517, "learning_rate": 1.0106382978723405e-05, "loss": 0.9789, "step": 190 }, { "epoch": 0.015247066336712701, "grad_norm": 0.41961692803496015, "learning_rate": 1.0159574468085107e-05, "loss": 1.0221, "step": 191 }, { "epoch": 0.015326893909156222, "grad_norm": 0.4074495785739181, "learning_rate": 1.0212765957446808e-05, "loss": 0.9743, "step": 192 }, { "epoch": 0.015406721481599744, "grad_norm": 0.4112318052938174, "learning_rate": 1.0265957446808513e-05, "loss": 0.9535, "step": 193 }, { "epoch": 0.015486549054043267, "grad_norm": 0.39638059949001425, "learning_rate": 1.0319148936170214e-05, "loss": 0.9971, "step": 194 }, { "epoch": 0.01556637662648679, "grad_norm": 0.3996009462625815, "learning_rate": 1.0372340425531916e-05, "loss": 1.0009, "step": 195 }, { "epoch": 0.01564620419893031, "grad_norm": 0.39792898362078777, "learning_rate": 1.0425531914893619e-05, "loss": 0.9785, "step": 196 }, { "epoch": 0.01572603177137383, "grad_norm": 0.40035767866369837, "learning_rate": 1.047872340425532e-05, "loss": 0.967, "step": 197 }, { "epoch": 0.015805859343817355, "grad_norm": 0.4030561826291099, "learning_rate": 1.0531914893617022e-05, "loss": 0.9614, "step": 198 }, { "epoch": 0.015885686916260876, "grad_norm": 0.42997204825823476, "learning_rate": 1.0585106382978725e-05, "loss": 1.0146, "step": 199 }, { "epoch": 0.0159655144887044, "grad_norm": 0.4251083452992647, "learning_rate": 1.0638297872340426e-05, "loss": 0.9304, "step": 200 }, { "epoch": 0.01604534206114792, "grad_norm": 0.439454770764412, "learning_rate": 1.0691489361702128e-05, "loss": 0.9488, "step": 201 }, { "epoch": 0.01612516963359144, "grad_norm": 0.3974818956546718, "learning_rate": 1.074468085106383e-05, "loss": 0.9525, "step": 202 }, { "epoch": 0.016204997206034966, "grad_norm": 0.4112582356942403, "learning_rate": 1.0797872340425532e-05, "loss": 0.8533, "step": 203 }, { "epoch": 0.016284824778478486, "grad_norm": 0.4286790681099995, "learning_rate": 1.0851063829787233e-05, "loss": 0.9393, "step": 204 }, { "epoch": 0.016364652350922007, "grad_norm": 0.41509435592418514, "learning_rate": 1.0904255319148938e-05, "loss": 0.9272, "step": 205 }, { "epoch": 0.01644447992336553, "grad_norm": 0.38279000623356496, "learning_rate": 1.095744680851064e-05, "loss": 0.9304, "step": 206 }, { "epoch": 0.016524307495809052, "grad_norm": 0.3739481489531005, "learning_rate": 1.1010638297872341e-05, "loss": 0.9063, "step": 207 }, { "epoch": 0.016604135068252573, "grad_norm": 0.4059646579633119, "learning_rate": 1.1063829787234044e-05, "loss": 0.8971, "step": 208 }, { "epoch": 0.016683962640696097, "grad_norm": 0.42035789588680883, "learning_rate": 1.1117021276595746e-05, "loss": 0.9224, "step": 209 }, { "epoch": 0.016763790213139618, "grad_norm": 0.40626840566245787, "learning_rate": 1.1170212765957447e-05, "loss": 0.9222, "step": 210 }, { "epoch": 0.016843617785583142, "grad_norm": 0.4031520990083406, "learning_rate": 1.122340425531915e-05, "loss": 0.9653, "step": 211 }, { "epoch": 0.016923445358026663, "grad_norm": 0.3768361641653907, "learning_rate": 1.1276595744680851e-05, "loss": 0.8657, "step": 212 }, { "epoch": 0.017003272930470183, "grad_norm": 0.41724203761139655, "learning_rate": 1.1329787234042555e-05, "loss": 0.8737, "step": 213 }, { "epoch": 0.017083100502913708, "grad_norm": 0.3717075382781602, "learning_rate": 1.1382978723404256e-05, "loss": 0.8032, "step": 214 }, { "epoch": 0.01716292807535723, "grad_norm": 0.38712211297182963, "learning_rate": 1.1436170212765957e-05, "loss": 0.9516, "step": 215 }, { "epoch": 0.01724275564780075, "grad_norm": 0.3900758912243753, "learning_rate": 1.1489361702127662e-05, "loss": 0.8659, "step": 216 }, { "epoch": 0.017322583220244273, "grad_norm": 0.3814809350733449, "learning_rate": 1.1542553191489364e-05, "loss": 0.8532, "step": 217 }, { "epoch": 0.017402410792687794, "grad_norm": 0.3884260315132834, "learning_rate": 1.1595744680851065e-05, "loss": 0.9233, "step": 218 }, { "epoch": 0.017482238365131315, "grad_norm": 0.3655659584823818, "learning_rate": 1.1648936170212768e-05, "loss": 0.8172, "step": 219 }, { "epoch": 0.01756206593757484, "grad_norm": 0.3740127728998221, "learning_rate": 1.170212765957447e-05, "loss": 0.8656, "step": 220 }, { "epoch": 0.01764189351001836, "grad_norm": 0.3750795308123184, "learning_rate": 1.175531914893617e-05, "loss": 0.9511, "step": 221 }, { "epoch": 0.017721721082461884, "grad_norm": 0.4126064214973273, "learning_rate": 1.1808510638297874e-05, "loss": 0.9427, "step": 222 }, { "epoch": 0.017801548654905405, "grad_norm": 0.37801537877004593, "learning_rate": 1.1861702127659575e-05, "loss": 0.7819, "step": 223 }, { "epoch": 0.017881376227348925, "grad_norm": 0.385299030939099, "learning_rate": 1.1914893617021277e-05, "loss": 0.8619, "step": 224 }, { "epoch": 0.01796120379979245, "grad_norm": 0.360480269576549, "learning_rate": 1.196808510638298e-05, "loss": 0.8256, "step": 225 }, { "epoch": 0.01804103137223597, "grad_norm": 0.3865588116611529, "learning_rate": 1.2021276595744681e-05, "loss": 0.8356, "step": 226 }, { "epoch": 0.01812085894467949, "grad_norm": 0.41537458905585245, "learning_rate": 1.2074468085106383e-05, "loss": 0.913, "step": 227 }, { "epoch": 0.018200686517123015, "grad_norm": 0.38634286389426054, "learning_rate": 1.2127659574468087e-05, "loss": 0.8411, "step": 228 }, { "epoch": 0.018280514089566536, "grad_norm": 0.3454666233979339, "learning_rate": 1.2180851063829789e-05, "loss": 0.8067, "step": 229 }, { "epoch": 0.018360341662010057, "grad_norm": 0.36837260546841305, "learning_rate": 1.223404255319149e-05, "loss": 0.7613, "step": 230 }, { "epoch": 0.01844016923445358, "grad_norm": 0.40797594943859417, "learning_rate": 1.2287234042553193e-05, "loss": 0.78, "step": 231 }, { "epoch": 0.0185199968068971, "grad_norm": 0.3670852283898651, "learning_rate": 1.2340425531914895e-05, "loss": 0.8604, "step": 232 }, { "epoch": 0.018599824379340626, "grad_norm": 0.3678969592438566, "learning_rate": 1.2393617021276596e-05, "loss": 0.8114, "step": 233 }, { "epoch": 0.018679651951784147, "grad_norm": 0.3655402531431315, "learning_rate": 1.24468085106383e-05, "loss": 0.7978, "step": 234 }, { "epoch": 0.018759479524227667, "grad_norm": 0.3664963208190591, "learning_rate": 1.25e-05, "loss": 0.7065, "step": 235 }, { "epoch": 0.01883930709667119, "grad_norm": 0.40028917531014757, "learning_rate": 1.2553191489361702e-05, "loss": 0.873, "step": 236 }, { "epoch": 0.018919134669114712, "grad_norm": 0.35971532481343726, "learning_rate": 1.2606382978723407e-05, "loss": 0.7693, "step": 237 }, { "epoch": 0.018998962241558233, "grad_norm": 0.37061817484398346, "learning_rate": 1.2659574468085108e-05, "loss": 0.8274, "step": 238 }, { "epoch": 0.019078789814001757, "grad_norm": 0.3775722114273337, "learning_rate": 1.2712765957446808e-05, "loss": 0.7369, "step": 239 }, { "epoch": 0.019158617386445278, "grad_norm": 0.3781962060420915, "learning_rate": 1.2765957446808513e-05, "loss": 0.791, "step": 240 }, { "epoch": 0.0192384449588888, "grad_norm": 0.3649986498867162, "learning_rate": 1.2819148936170214e-05, "loss": 0.7628, "step": 241 }, { "epoch": 0.019318272531332323, "grad_norm": 0.39283527695281056, "learning_rate": 1.2872340425531915e-05, "loss": 0.7731, "step": 242 }, { "epoch": 0.019398100103775844, "grad_norm": 0.35952956463535257, "learning_rate": 1.2925531914893619e-05, "loss": 0.8188, "step": 243 }, { "epoch": 0.019477927676219368, "grad_norm": 0.3823982760272184, "learning_rate": 1.297872340425532e-05, "loss": 0.8678, "step": 244 }, { "epoch": 0.01955775524866289, "grad_norm": 0.3520777296640576, "learning_rate": 1.3031914893617021e-05, "loss": 0.7367, "step": 245 }, { "epoch": 0.01963758282110641, "grad_norm": 0.37974706941333575, "learning_rate": 1.3085106382978724e-05, "loss": 0.7706, "step": 246 }, { "epoch": 0.019717410393549933, "grad_norm": 0.3603998523087756, "learning_rate": 1.3138297872340426e-05, "loss": 0.7537, "step": 247 }, { "epoch": 0.019797237965993454, "grad_norm": 0.35666163660414735, "learning_rate": 1.3191489361702127e-05, "loss": 0.7912, "step": 248 }, { "epoch": 0.019877065538436975, "grad_norm": 0.40015980490055875, "learning_rate": 1.3244680851063832e-05, "loss": 0.7154, "step": 249 }, { "epoch": 0.0199568931108805, "grad_norm": 0.37058381398253837, "learning_rate": 1.3297872340425533e-05, "loss": 0.7489, "step": 250 }, { "epoch": 0.02003672068332402, "grad_norm": 0.3906618105045944, "learning_rate": 1.3351063829787235e-05, "loss": 0.8135, "step": 251 }, { "epoch": 0.02011654825576754, "grad_norm": 0.3771141996010153, "learning_rate": 1.3404255319148938e-05, "loss": 0.7364, "step": 252 }, { "epoch": 0.020196375828211065, "grad_norm": 0.37082685559084233, "learning_rate": 1.345744680851064e-05, "loss": 0.749, "step": 253 }, { "epoch": 0.020276203400654585, "grad_norm": 0.3831595564295841, "learning_rate": 1.351063829787234e-05, "loss": 0.7339, "step": 254 }, { "epoch": 0.02035603097309811, "grad_norm": 0.3621451113201384, "learning_rate": 1.3563829787234044e-05, "loss": 0.7763, "step": 255 }, { "epoch": 0.02043585854554163, "grad_norm": 0.36373232216527934, "learning_rate": 1.3617021276595745e-05, "loss": 0.7561, "step": 256 }, { "epoch": 0.02051568611798515, "grad_norm": 0.3812142511403156, "learning_rate": 1.3670212765957447e-05, "loss": 0.7481, "step": 257 }, { "epoch": 0.020595513690428675, "grad_norm": 0.3656256353127966, "learning_rate": 1.372340425531915e-05, "loss": 0.705, "step": 258 }, { "epoch": 0.020675341262872196, "grad_norm": 0.35485280102316813, "learning_rate": 1.3776595744680851e-05, "loss": 0.7369, "step": 259 }, { "epoch": 0.020755168835315717, "grad_norm": 0.3587675646680144, "learning_rate": 1.3829787234042556e-05, "loss": 0.6845, "step": 260 }, { "epoch": 0.02083499640775924, "grad_norm": 0.37344428126130946, "learning_rate": 1.3882978723404257e-05, "loss": 0.7292, "step": 261 }, { "epoch": 0.020914823980202762, "grad_norm": 0.3614153939632489, "learning_rate": 1.3936170212765959e-05, "loss": 0.8274, "step": 262 }, { "epoch": 0.020994651552646282, "grad_norm": 0.35730972189291105, "learning_rate": 1.3989361702127662e-05, "loss": 0.7359, "step": 263 }, { "epoch": 0.021074479125089807, "grad_norm": 0.37420035942800817, "learning_rate": 1.4042553191489363e-05, "loss": 0.7285, "step": 264 }, { "epoch": 0.021154306697533327, "grad_norm": 0.35624690717575436, "learning_rate": 1.4095744680851065e-05, "loss": 0.6996, "step": 265 }, { "epoch": 0.02123413426997685, "grad_norm": 0.3798299299982057, "learning_rate": 1.4148936170212768e-05, "loss": 0.6361, "step": 266 }, { "epoch": 0.021313961842420372, "grad_norm": 0.3448543699958519, "learning_rate": 1.420212765957447e-05, "loss": 0.7481, "step": 267 }, { "epoch": 0.021393789414863893, "grad_norm": 0.36029060323462414, "learning_rate": 1.425531914893617e-05, "loss": 0.6807, "step": 268 }, { "epoch": 0.021473616987307417, "grad_norm": 0.3360656884432279, "learning_rate": 1.4308510638297874e-05, "loss": 0.6309, "step": 269 }, { "epoch": 0.021553444559750938, "grad_norm": 0.34787657017659473, "learning_rate": 1.4361702127659575e-05, "loss": 0.7009, "step": 270 }, { "epoch": 0.02163327213219446, "grad_norm": 0.3140840883338713, "learning_rate": 1.4414893617021276e-05, "loss": 0.714, "step": 271 }, { "epoch": 0.021713099704637983, "grad_norm": 0.35098455007891816, "learning_rate": 1.4468085106382981e-05, "loss": 0.7224, "step": 272 }, { "epoch": 0.021792927277081504, "grad_norm": 0.32737158963358054, "learning_rate": 1.4521276595744683e-05, "loss": 0.6723, "step": 273 }, { "epoch": 0.021872754849525024, "grad_norm": 0.3265546837018743, "learning_rate": 1.4574468085106384e-05, "loss": 0.7197, "step": 274 }, { "epoch": 0.02195258242196855, "grad_norm": 0.34536144173235833, "learning_rate": 1.4627659574468087e-05, "loss": 0.6684, "step": 275 }, { "epoch": 0.02203240999441207, "grad_norm": 0.29810879936285156, "learning_rate": 1.4680851063829789e-05, "loss": 0.6188, "step": 276 }, { "epoch": 0.022112237566855594, "grad_norm": 0.30674292007627363, "learning_rate": 1.473404255319149e-05, "loss": 0.7023, "step": 277 }, { "epoch": 0.022192065139299114, "grad_norm": 0.3047744591409263, "learning_rate": 1.4787234042553193e-05, "loss": 0.6229, "step": 278 }, { "epoch": 0.022271892711742635, "grad_norm": 0.29196483488228375, "learning_rate": 1.4840425531914894e-05, "loss": 0.6691, "step": 279 }, { "epoch": 0.02235172028418616, "grad_norm": 0.3733953329094375, "learning_rate": 1.4893617021276596e-05, "loss": 0.7502, "step": 280 }, { "epoch": 0.02243154785662968, "grad_norm": 0.2727129893723137, "learning_rate": 1.49468085106383e-05, "loss": 0.6562, "step": 281 }, { "epoch": 0.0225113754290732, "grad_norm": 0.2880992141851244, "learning_rate": 1.5000000000000002e-05, "loss": 0.6407, "step": 282 }, { "epoch": 0.022591203001516725, "grad_norm": 0.28872243205771314, "learning_rate": 1.5053191489361702e-05, "loss": 0.674, "step": 283 }, { "epoch": 0.022671030573960246, "grad_norm": 0.3059941804485524, "learning_rate": 1.5106382978723407e-05, "loss": 0.6734, "step": 284 }, { "epoch": 0.022750858146403766, "grad_norm": 0.3318871474794567, "learning_rate": 1.5159574468085108e-05, "loss": 0.6827, "step": 285 }, { "epoch": 0.02283068571884729, "grad_norm": 0.29362196894636916, "learning_rate": 1.521276595744681e-05, "loss": 0.6344, "step": 286 }, { "epoch": 0.02291051329129081, "grad_norm": 0.27723634271388614, "learning_rate": 1.5265957446808512e-05, "loss": 0.6265, "step": 287 }, { "epoch": 0.022990340863734336, "grad_norm": 0.3023086045304901, "learning_rate": 1.5319148936170214e-05, "loss": 0.682, "step": 288 }, { "epoch": 0.023070168436177856, "grad_norm": 0.3408797873073136, "learning_rate": 1.5372340425531915e-05, "loss": 0.719, "step": 289 }, { "epoch": 0.023149996008621377, "grad_norm": 0.3079566517206762, "learning_rate": 1.542553191489362e-05, "loss": 0.746, "step": 290 }, { "epoch": 0.0232298235810649, "grad_norm": 0.28957045123597025, "learning_rate": 1.547872340425532e-05, "loss": 0.6068, "step": 291 }, { "epoch": 0.023309651153508422, "grad_norm": 0.31896830486662486, "learning_rate": 1.5531914893617023e-05, "loss": 0.6683, "step": 292 }, { "epoch": 0.023389478725951943, "grad_norm": 0.2796522508623155, "learning_rate": 1.5585106382978724e-05, "loss": 0.6422, "step": 293 }, { "epoch": 0.023469306298395467, "grad_norm": 0.292295496384891, "learning_rate": 1.5638297872340426e-05, "loss": 0.6769, "step": 294 }, { "epoch": 0.023549133870838988, "grad_norm": 0.360799855596052, "learning_rate": 1.5691489361702127e-05, "loss": 0.6691, "step": 295 }, { "epoch": 0.02362896144328251, "grad_norm": 0.23482077819815253, "learning_rate": 1.5744680851063832e-05, "loss": 0.5871, "step": 296 }, { "epoch": 0.023708789015726033, "grad_norm": 0.34363519970687256, "learning_rate": 1.5797872340425533e-05, "loss": 0.595, "step": 297 }, { "epoch": 0.023788616588169553, "grad_norm": 0.25366963152458366, "learning_rate": 1.5851063829787235e-05, "loss": 0.6119, "step": 298 }, { "epoch": 0.023868444160613077, "grad_norm": 0.2608809865063338, "learning_rate": 1.590425531914894e-05, "loss": 0.6031, "step": 299 }, { "epoch": 0.023948271733056598, "grad_norm": 0.2893794543428356, "learning_rate": 1.595744680851064e-05, "loss": 0.5713, "step": 300 }, { "epoch": 0.02402809930550012, "grad_norm": 0.2812402407216137, "learning_rate": 1.6010638297872342e-05, "loss": 0.6712, "step": 301 }, { "epoch": 0.024107926877943643, "grad_norm": 0.26609808581483735, "learning_rate": 1.6063829787234044e-05, "loss": 0.6329, "step": 302 }, { "epoch": 0.024187754450387164, "grad_norm": 0.2508996877438266, "learning_rate": 1.6117021276595745e-05, "loss": 0.5509, "step": 303 }, { "epoch": 0.024267582022830685, "grad_norm": 0.2566143077548217, "learning_rate": 1.6170212765957446e-05, "loss": 0.5821, "step": 304 }, { "epoch": 0.02434740959527421, "grad_norm": 0.31061666690036716, "learning_rate": 1.622340425531915e-05, "loss": 0.6584, "step": 305 }, { "epoch": 0.02442723716771773, "grad_norm": 0.3774190178034926, "learning_rate": 1.6276595744680853e-05, "loss": 0.7179, "step": 306 }, { "epoch": 0.02450706474016125, "grad_norm": 0.27673263224829864, "learning_rate": 1.6329787234042554e-05, "loss": 0.6526, "step": 307 }, { "epoch": 0.024586892312604774, "grad_norm": 0.291419286851389, "learning_rate": 1.6382978723404255e-05, "loss": 0.5688, "step": 308 }, { "epoch": 0.024666719885048295, "grad_norm": 0.2904613489572453, "learning_rate": 1.6436170212765957e-05, "loss": 0.6676, "step": 309 }, { "epoch": 0.024746547457491816, "grad_norm": 0.2806125636676599, "learning_rate": 1.648936170212766e-05, "loss": 0.6144, "step": 310 }, { "epoch": 0.02482637502993534, "grad_norm": 0.2640675935567978, "learning_rate": 1.6542553191489363e-05, "loss": 0.5208, "step": 311 }, { "epoch": 0.02490620260237886, "grad_norm": 0.3054839502492467, "learning_rate": 1.6595744680851064e-05, "loss": 0.5689, "step": 312 }, { "epoch": 0.024986030174822385, "grad_norm": 0.2727190975277303, "learning_rate": 1.664893617021277e-05, "loss": 0.6127, "step": 313 }, { "epoch": 0.025065857747265906, "grad_norm": 0.29059123094681993, "learning_rate": 1.670212765957447e-05, "loss": 0.5492, "step": 314 }, { "epoch": 0.025145685319709427, "grad_norm": 0.27281927427016517, "learning_rate": 1.6755319148936172e-05, "loss": 0.5657, "step": 315 }, { "epoch": 0.02522551289215295, "grad_norm": 0.2733592772723512, "learning_rate": 1.6808510638297873e-05, "loss": 0.5478, "step": 316 }, { "epoch": 0.02530534046459647, "grad_norm": 0.24708426631956174, "learning_rate": 1.6861702127659575e-05, "loss": 0.5703, "step": 317 }, { "epoch": 0.025385168037039992, "grad_norm": 0.33234494259196107, "learning_rate": 1.6914893617021276e-05, "loss": 0.5961, "step": 318 }, { "epoch": 0.025464995609483516, "grad_norm": 0.25178121933993103, "learning_rate": 1.696808510638298e-05, "loss": 0.5708, "step": 319 }, { "epoch": 0.025544823181927037, "grad_norm": 0.26280253640011353, "learning_rate": 1.7021276595744682e-05, "loss": 0.6259, "step": 320 }, { "epoch": 0.025624650754370558, "grad_norm": 0.26413655431155036, "learning_rate": 1.7074468085106384e-05, "loss": 0.5412, "step": 321 }, { "epoch": 0.025704478326814082, "grad_norm": 0.26885385853419197, "learning_rate": 1.712765957446809e-05, "loss": 0.6054, "step": 322 }, { "epoch": 0.025784305899257603, "grad_norm": 0.255116813459344, "learning_rate": 1.718085106382979e-05, "loss": 0.5204, "step": 323 }, { "epoch": 0.025864133471701127, "grad_norm": 0.26929301824514246, "learning_rate": 1.723404255319149e-05, "loss": 0.5889, "step": 324 }, { "epoch": 0.025943961044144648, "grad_norm": 0.29650650687254265, "learning_rate": 1.7287234042553193e-05, "loss": 0.5716, "step": 325 }, { "epoch": 0.02602378861658817, "grad_norm": 0.29103259091504163, "learning_rate": 1.7340425531914894e-05, "loss": 0.5532, "step": 326 }, { "epoch": 0.026103616189031693, "grad_norm": 0.3047010788699226, "learning_rate": 1.7393617021276596e-05, "loss": 0.6147, "step": 327 }, { "epoch": 0.026183443761475213, "grad_norm": 0.250579444476874, "learning_rate": 1.74468085106383e-05, "loss": 0.6041, "step": 328 }, { "epoch": 0.026263271333918734, "grad_norm": 0.2457547563153005, "learning_rate": 1.7500000000000002e-05, "loss": 0.5259, "step": 329 }, { "epoch": 0.02634309890636226, "grad_norm": 0.2966602307218245, "learning_rate": 1.7553191489361703e-05, "loss": 0.5751, "step": 330 }, { "epoch": 0.02642292647880578, "grad_norm": 0.34188763576360515, "learning_rate": 1.7606382978723408e-05, "loss": 0.5785, "step": 331 }, { "epoch": 0.0265027540512493, "grad_norm": 0.3319496787838454, "learning_rate": 1.765957446808511e-05, "loss": 0.5984, "step": 332 }, { "epoch": 0.026582581623692824, "grad_norm": 0.29501162094966776, "learning_rate": 1.7712765957446807e-05, "loss": 0.6184, "step": 333 }, { "epoch": 0.026662409196136345, "grad_norm": 0.24487353683677215, "learning_rate": 1.7765957446808512e-05, "loss": 0.6094, "step": 334 }, { "epoch": 0.02674223676857987, "grad_norm": 0.358394224987464, "learning_rate": 1.7819148936170214e-05, "loss": 0.5083, "step": 335 }, { "epoch": 0.02682206434102339, "grad_norm": 0.2750664665052955, "learning_rate": 1.7872340425531915e-05, "loss": 0.4748, "step": 336 }, { "epoch": 0.02690189191346691, "grad_norm": 0.3061913023189731, "learning_rate": 1.792553191489362e-05, "loss": 0.5391, "step": 337 }, { "epoch": 0.026981719485910435, "grad_norm": 0.29244638609494555, "learning_rate": 1.797872340425532e-05, "loss": 0.5526, "step": 338 }, { "epoch": 0.027061547058353955, "grad_norm": 0.2757667705799253, "learning_rate": 1.8031914893617023e-05, "loss": 0.5848, "step": 339 }, { "epoch": 0.027141374630797476, "grad_norm": 0.29396121484863297, "learning_rate": 1.8085106382978724e-05, "loss": 0.5583, "step": 340 }, { "epoch": 0.027221202203241, "grad_norm": 0.30939929279329237, "learning_rate": 1.8138297872340425e-05, "loss": 0.5325, "step": 341 }, { "epoch": 0.02730102977568452, "grad_norm": 0.2833985320473062, "learning_rate": 1.8191489361702127e-05, "loss": 0.4928, "step": 342 }, { "epoch": 0.027380857348128042, "grad_norm": 0.26473161249632887, "learning_rate": 1.824468085106383e-05, "loss": 0.5082, "step": 343 }, { "epoch": 0.027460684920571566, "grad_norm": 0.2913786214742765, "learning_rate": 1.8297872340425533e-05, "loss": 0.5319, "step": 344 }, { "epoch": 0.027540512493015087, "grad_norm": 0.292709439234605, "learning_rate": 1.8351063829787234e-05, "loss": 0.5358, "step": 345 }, { "epoch": 0.02762034006545861, "grad_norm": 0.2997277611784478, "learning_rate": 1.840425531914894e-05, "loss": 0.5554, "step": 346 }, { "epoch": 0.02770016763790213, "grad_norm": 0.3393964574515042, "learning_rate": 1.845744680851064e-05, "loss": 0.5352, "step": 347 }, { "epoch": 0.027779995210345652, "grad_norm": 0.30949908817795235, "learning_rate": 1.8510638297872342e-05, "loss": 0.5305, "step": 348 }, { "epoch": 0.027859822782789177, "grad_norm": 0.2978863546649725, "learning_rate": 1.8563829787234043e-05, "loss": 0.5479, "step": 349 }, { "epoch": 0.027939650355232697, "grad_norm": 0.2673130446676285, "learning_rate": 1.8617021276595745e-05, "loss": 0.5679, "step": 350 }, { "epoch": 0.028019477927676218, "grad_norm": 0.2907354362322766, "learning_rate": 1.8670212765957446e-05, "loss": 0.5356, "step": 351 }, { "epoch": 0.028099305500119742, "grad_norm": 0.3085011379217589, "learning_rate": 1.872340425531915e-05, "loss": 0.5349, "step": 352 }, { "epoch": 0.028179133072563263, "grad_norm": 0.3410989953653494, "learning_rate": 1.8776595744680852e-05, "loss": 0.5403, "step": 353 }, { "epoch": 0.028258960645006784, "grad_norm": 0.34014056160844475, "learning_rate": 1.8829787234042557e-05, "loss": 0.5324, "step": 354 }, { "epoch": 0.028338788217450308, "grad_norm": 0.30936612145401826, "learning_rate": 1.888297872340426e-05, "loss": 0.5507, "step": 355 }, { "epoch": 0.02841861578989383, "grad_norm": 0.2708625592851619, "learning_rate": 1.893617021276596e-05, "loss": 0.4898, "step": 356 }, { "epoch": 0.028498443362337353, "grad_norm": 0.30684612861369187, "learning_rate": 1.898936170212766e-05, "loss": 0.4847, "step": 357 }, { "epoch": 0.028578270934780874, "grad_norm": 0.2600456698105891, "learning_rate": 1.9042553191489363e-05, "loss": 0.5424, "step": 358 }, { "epoch": 0.028658098507224394, "grad_norm": 0.2560323364586408, "learning_rate": 1.9095744680851064e-05, "loss": 0.5198, "step": 359 }, { "epoch": 0.02873792607966792, "grad_norm": 0.2775341203222387, "learning_rate": 1.914893617021277e-05, "loss": 0.5339, "step": 360 }, { "epoch": 0.02881775365211144, "grad_norm": 0.3364567252740908, "learning_rate": 1.920212765957447e-05, "loss": 0.4948, "step": 361 }, { "epoch": 0.02889758122455496, "grad_norm": 0.2750701749689801, "learning_rate": 1.9255319148936172e-05, "loss": 0.531, "step": 362 }, { "epoch": 0.028977408796998484, "grad_norm": 0.30251703944754804, "learning_rate": 1.9308510638297873e-05, "loss": 0.5954, "step": 363 }, { "epoch": 0.029057236369442005, "grad_norm": 0.3066051128463291, "learning_rate": 1.9361702127659575e-05, "loss": 0.636, "step": 364 }, { "epoch": 0.029137063941885526, "grad_norm": 0.29453972106679716, "learning_rate": 1.9414893617021276e-05, "loss": 0.5379, "step": 365 }, { "epoch": 0.02921689151432905, "grad_norm": 0.2754870946819982, "learning_rate": 1.946808510638298e-05, "loss": 0.5024, "step": 366 }, { "epoch": 0.02929671908677257, "grad_norm": 0.26384130601120936, "learning_rate": 1.9521276595744682e-05, "loss": 0.4716, "step": 367 }, { "epoch": 0.029376546659216095, "grad_norm": 0.254982565581993, "learning_rate": 1.9574468085106384e-05, "loss": 0.5747, "step": 368 }, { "epoch": 0.029456374231659616, "grad_norm": 0.2317763251866386, "learning_rate": 1.962765957446809e-05, "loss": 0.5053, "step": 369 }, { "epoch": 0.029536201804103136, "grad_norm": 0.2853338356935848, "learning_rate": 1.968085106382979e-05, "loss": 0.5597, "step": 370 }, { "epoch": 0.02961602937654666, "grad_norm": 0.27010124099038224, "learning_rate": 1.973404255319149e-05, "loss": 0.4876, "step": 371 }, { "epoch": 0.02969585694899018, "grad_norm": 0.2549283867810663, "learning_rate": 1.9787234042553193e-05, "loss": 0.5616, "step": 372 }, { "epoch": 0.029775684521433702, "grad_norm": 0.288168146107891, "learning_rate": 1.9840425531914894e-05, "loss": 0.5175, "step": 373 }, { "epoch": 0.029855512093877226, "grad_norm": 0.2529851005697039, "learning_rate": 1.9893617021276595e-05, "loss": 0.4739, "step": 374 }, { "epoch": 0.029935339666320747, "grad_norm": 0.2464864761727044, "learning_rate": 1.99468085106383e-05, "loss": 0.4878, "step": 375 }, { "epoch": 0.030015167238764268, "grad_norm": 0.27540295928695624, "learning_rate": 2e-05, "loss": 0.4771, "step": 376 }, { "epoch": 0.030094994811207792, "grad_norm": 0.25871874520425514, "learning_rate": 1.999999966576979e-05, "loss": 0.4747, "step": 377 }, { "epoch": 0.030174822383651313, "grad_norm": 0.2862886568314869, "learning_rate": 1.9999998663079173e-05, "loss": 0.5524, "step": 378 }, { "epoch": 0.030254649956094837, "grad_norm": 0.2458188277939762, "learning_rate": 1.999999699192822e-05, "loss": 0.5323, "step": 379 }, { "epoch": 0.030334477528538357, "grad_norm": 0.27710478097009067, "learning_rate": 1.9999994652317042e-05, "loss": 0.4373, "step": 380 }, { "epoch": 0.030414305100981878, "grad_norm": 0.23812096480086342, "learning_rate": 1.9999991644245796e-05, "loss": 0.4431, "step": 381 }, { "epoch": 0.030494132673425402, "grad_norm": 0.2926186860234796, "learning_rate": 1.9999987967714684e-05, "loss": 0.5126, "step": 382 }, { "epoch": 0.030573960245868923, "grad_norm": 0.2855516786839399, "learning_rate": 1.999998362272395e-05, "loss": 0.5117, "step": 383 }, { "epoch": 0.030653787818312444, "grad_norm": 0.2737911359078456, "learning_rate": 1.9999978609273884e-05, "loss": 0.4432, "step": 384 }, { "epoch": 0.030733615390755968, "grad_norm": 0.26134856348924906, "learning_rate": 1.9999972927364825e-05, "loss": 0.4251, "step": 385 }, { "epoch": 0.03081344296319949, "grad_norm": 0.2898633872809489, "learning_rate": 1.9999966576997145e-05, "loss": 0.4341, "step": 386 }, { "epoch": 0.03089327053564301, "grad_norm": 0.2741133369715729, "learning_rate": 1.9999959558171276e-05, "loss": 0.4645, "step": 387 }, { "epoch": 0.030973098108086534, "grad_norm": 0.2898313742460782, "learning_rate": 1.9999951870887683e-05, "loss": 0.485, "step": 388 }, { "epoch": 0.031052925680530055, "grad_norm": 0.23487139943442273, "learning_rate": 1.9999943515146884e-05, "loss": 0.3941, "step": 389 }, { "epoch": 0.03113275325297358, "grad_norm": 0.25929053432887006, "learning_rate": 1.9999934490949435e-05, "loss": 0.5156, "step": 390 }, { "epoch": 0.0312125808254171, "grad_norm": 0.2544725379905895, "learning_rate": 1.999992479829594e-05, "loss": 0.427, "step": 391 }, { "epoch": 0.03129240839786062, "grad_norm": 0.2556339715747649, "learning_rate": 1.9999914437187043e-05, "loss": 0.438, "step": 392 }, { "epoch": 0.03137223597030414, "grad_norm": 0.3168271563548717, "learning_rate": 1.999990340762344e-05, "loss": 0.5231, "step": 393 }, { "epoch": 0.03145206354274766, "grad_norm": 0.2881214442065685, "learning_rate": 1.999989170960587e-05, "loss": 0.4966, "step": 394 }, { "epoch": 0.03153189111519119, "grad_norm": 0.26029718723823286, "learning_rate": 1.999987934313511e-05, "loss": 0.4473, "step": 395 }, { "epoch": 0.03161171868763471, "grad_norm": 0.2702649953132241, "learning_rate": 1.999986630821199e-05, "loss": 0.4742, "step": 396 }, { "epoch": 0.03169154626007823, "grad_norm": 0.31450016331123887, "learning_rate": 1.9999852604837388e-05, "loss": 0.469, "step": 397 }, { "epoch": 0.03177137383252175, "grad_norm": 0.2850048723094952, "learning_rate": 1.9999838233012206e-05, "loss": 0.4293, "step": 398 }, { "epoch": 0.03185120140496527, "grad_norm": 0.2843832850320762, "learning_rate": 1.9999823192737415e-05, "loss": 0.4576, "step": 399 }, { "epoch": 0.0319310289774088, "grad_norm": 0.26155425121663073, "learning_rate": 1.9999807484014017e-05, "loss": 0.4962, "step": 400 }, { "epoch": 0.03201085654985232, "grad_norm": 0.28253632282203184, "learning_rate": 1.9999791106843062e-05, "loss": 0.516, "step": 401 }, { "epoch": 0.03209068412229584, "grad_norm": 0.27352413599509096, "learning_rate": 1.9999774061225642e-05, "loss": 0.4788, "step": 402 }, { "epoch": 0.03217051169473936, "grad_norm": 0.27945423514045975, "learning_rate": 1.99997563471629e-05, "loss": 0.4962, "step": 403 }, { "epoch": 0.03225033926718288, "grad_norm": 0.3110129572990749, "learning_rate": 1.999973796465602e-05, "loss": 0.4722, "step": 404 }, { "epoch": 0.032330166839626404, "grad_norm": 0.3038741685068818, "learning_rate": 1.999971891370623e-05, "loss": 0.4902, "step": 405 }, { "epoch": 0.03240999441206993, "grad_norm": 0.276018307165378, "learning_rate": 1.9999699194314804e-05, "loss": 0.534, "step": 406 }, { "epoch": 0.03248982198451345, "grad_norm": 0.282044543976251, "learning_rate": 1.9999678806483062e-05, "loss": 0.457, "step": 407 }, { "epoch": 0.03256964955695697, "grad_norm": 0.2657103543100286, "learning_rate": 1.9999657750212362e-05, "loss": 0.4162, "step": 408 }, { "epoch": 0.03264947712940049, "grad_norm": 0.2559225176482518, "learning_rate": 1.9999636025504117e-05, "loss": 0.4256, "step": 409 }, { "epoch": 0.032729304701844014, "grad_norm": 0.2711737343053326, "learning_rate": 1.9999613632359775e-05, "loss": 0.4869, "step": 410 }, { "epoch": 0.03280913227428754, "grad_norm": 0.24896925368400552, "learning_rate": 1.9999590570780832e-05, "loss": 0.4108, "step": 411 }, { "epoch": 0.03288895984673106, "grad_norm": 0.3448632505325103, "learning_rate": 1.9999566840768838e-05, "loss": 0.4317, "step": 412 }, { "epoch": 0.03296878741917458, "grad_norm": 0.2660893496005064, "learning_rate": 1.9999542442325368e-05, "loss": 0.4693, "step": 413 }, { "epoch": 0.033048614991618104, "grad_norm": 0.2714212094680256, "learning_rate": 1.999951737545206e-05, "loss": 0.4392, "step": 414 }, { "epoch": 0.033128442564061625, "grad_norm": 0.2499200043580038, "learning_rate": 1.9999491640150587e-05, "loss": 0.5095, "step": 415 }, { "epoch": 0.033208270136505146, "grad_norm": 0.2881961122780396, "learning_rate": 1.999946523642267e-05, "loss": 0.4302, "step": 416 }, { "epoch": 0.03328809770894867, "grad_norm": 0.3398564163394324, "learning_rate": 1.9999438164270074e-05, "loss": 0.5282, "step": 417 }, { "epoch": 0.033367925281392194, "grad_norm": 0.273375998551522, "learning_rate": 1.999941042369461e-05, "loss": 0.4568, "step": 418 }, { "epoch": 0.033447752853835715, "grad_norm": 0.22594467136676583, "learning_rate": 1.999938201469813e-05, "loss": 0.4094, "step": 419 }, { "epoch": 0.033527580426279235, "grad_norm": 0.30146485570333925, "learning_rate": 1.9999352937282535e-05, "loss": 0.4363, "step": 420 }, { "epoch": 0.033607407998722756, "grad_norm": 0.31027321730996404, "learning_rate": 1.9999323191449768e-05, "loss": 0.488, "step": 421 }, { "epoch": 0.033687235571166284, "grad_norm": 0.2545333400896693, "learning_rate": 1.9999292777201815e-05, "loss": 0.4151, "step": 422 }, { "epoch": 0.033767063143609805, "grad_norm": 0.23506450338688437, "learning_rate": 1.999926169454071e-05, "loss": 0.4101, "step": 423 }, { "epoch": 0.033846890716053325, "grad_norm": 0.3018771913765304, "learning_rate": 1.9999229943468535e-05, "loss": 0.4312, "step": 424 }, { "epoch": 0.033926718288496846, "grad_norm": 0.2615423990112727, "learning_rate": 1.999919752398741e-05, "loss": 0.4462, "step": 425 }, { "epoch": 0.03400654586094037, "grad_norm": 0.3074175479288148, "learning_rate": 1.9999164436099502e-05, "loss": 0.4401, "step": 426 }, { "epoch": 0.03408637343338389, "grad_norm": 0.2418473348180298, "learning_rate": 1.999913067980702e-05, "loss": 0.4248, "step": 427 }, { "epoch": 0.034166201005827415, "grad_norm": 0.2530223710965955, "learning_rate": 1.9999096255112224e-05, "loss": 0.4533, "step": 428 }, { "epoch": 0.034246028578270936, "grad_norm": 0.24668640332648356, "learning_rate": 1.9999061162017416e-05, "loss": 0.4638, "step": 429 }, { "epoch": 0.03432585615071446, "grad_norm": 0.2914532636963527, "learning_rate": 1.9999025400524938e-05, "loss": 0.4964, "step": 430 }, { "epoch": 0.03440568372315798, "grad_norm": 0.2961209138033692, "learning_rate": 1.9998988970637182e-05, "loss": 0.4434, "step": 431 }, { "epoch": 0.0344855112956015, "grad_norm": 0.28291538915056846, "learning_rate": 1.999895187235658e-05, "loss": 0.5034, "step": 432 }, { "epoch": 0.034565338868045026, "grad_norm": 0.3038322507586497, "learning_rate": 1.999891410568562e-05, "loss": 0.4617, "step": 433 }, { "epoch": 0.034645166440488546, "grad_norm": 0.28324345284001307, "learning_rate": 1.999887567062682e-05, "loss": 0.4457, "step": 434 }, { "epoch": 0.03472499401293207, "grad_norm": 0.25060278999441093, "learning_rate": 1.9998836567182756e-05, "loss": 0.4388, "step": 435 }, { "epoch": 0.03480482158537559, "grad_norm": 0.2699228861652706, "learning_rate": 1.999879679535603e-05, "loss": 0.4121, "step": 436 }, { "epoch": 0.03488464915781911, "grad_norm": 0.24336025073009374, "learning_rate": 1.9998756355149312e-05, "loss": 0.4417, "step": 437 }, { "epoch": 0.03496447673026263, "grad_norm": 0.25861933507834883, "learning_rate": 1.99987152465653e-05, "loss": 0.4068, "step": 438 }, { "epoch": 0.03504430430270616, "grad_norm": 0.2888626329159579, "learning_rate": 1.9998673469606743e-05, "loss": 0.4479, "step": 439 }, { "epoch": 0.03512413187514968, "grad_norm": 0.2484619804512189, "learning_rate": 1.9998631024276435e-05, "loss": 0.3816, "step": 440 }, { "epoch": 0.0352039594475932, "grad_norm": 0.29247246428106133, "learning_rate": 1.999858791057721e-05, "loss": 0.4554, "step": 441 }, { "epoch": 0.03528378702003672, "grad_norm": 0.2423356996463411, "learning_rate": 1.999854412851195e-05, "loss": 0.4214, "step": 442 }, { "epoch": 0.03536361459248024, "grad_norm": 0.2622856738800754, "learning_rate": 1.9998499678083585e-05, "loss": 0.4373, "step": 443 }, { "epoch": 0.03544344216492377, "grad_norm": 0.2827935232079969, "learning_rate": 1.9998454559295085e-05, "loss": 0.4537, "step": 444 }, { "epoch": 0.03552326973736729, "grad_norm": 0.2518024558254589, "learning_rate": 1.9998408772149468e-05, "loss": 0.3963, "step": 445 }, { "epoch": 0.03560309730981081, "grad_norm": 0.21979159904951467, "learning_rate": 1.999836231664979e-05, "loss": 0.4036, "step": 446 }, { "epoch": 0.03568292488225433, "grad_norm": 0.2474363860971638, "learning_rate": 1.999831519279916e-05, "loss": 0.4282, "step": 447 }, { "epoch": 0.03576275245469785, "grad_norm": 0.3197226253431312, "learning_rate": 1.9998267400600727e-05, "loss": 0.4727, "step": 448 }, { "epoch": 0.03584258002714137, "grad_norm": 0.2429081837030798, "learning_rate": 1.9998218940057684e-05, "loss": 0.4356, "step": 449 }, { "epoch": 0.0359224075995849, "grad_norm": 0.31983377656665685, "learning_rate": 1.999816981117327e-05, "loss": 0.4906, "step": 450 }, { "epoch": 0.03600223517202842, "grad_norm": 0.23747211541688634, "learning_rate": 1.9998120013950774e-05, "loss": 0.4242, "step": 451 }, { "epoch": 0.03608206274447194, "grad_norm": 0.26613709335511043, "learning_rate": 1.9998069548393524e-05, "loss": 0.4408, "step": 452 }, { "epoch": 0.03616189031691546, "grad_norm": 0.20985194528809561, "learning_rate": 1.999801841450489e-05, "loss": 0.3783, "step": 453 }, { "epoch": 0.03624171788935898, "grad_norm": 0.31019482122155634, "learning_rate": 1.999796661228829e-05, "loss": 0.4123, "step": 454 }, { "epoch": 0.03632154546180251, "grad_norm": 0.22256470722420676, "learning_rate": 1.9997914141747188e-05, "loss": 0.4201, "step": 455 }, { "epoch": 0.03640137303424603, "grad_norm": 0.21507610486339596, "learning_rate": 1.9997861002885093e-05, "loss": 0.4149, "step": 456 }, { "epoch": 0.03648120060668955, "grad_norm": 0.22342992498430878, "learning_rate": 1.9997807195705556e-05, "loss": 0.4281, "step": 457 }, { "epoch": 0.03656102817913307, "grad_norm": 0.22897572143035147, "learning_rate": 1.999775272021217e-05, "loss": 0.3748, "step": 458 }, { "epoch": 0.03664085575157659, "grad_norm": 0.23128649859632391, "learning_rate": 1.9997697576408582e-05, "loss": 0.4499, "step": 459 }, { "epoch": 0.03672068332402011, "grad_norm": 0.2312612153310835, "learning_rate": 1.9997641764298478e-05, "loss": 0.4165, "step": 460 }, { "epoch": 0.03680051089646364, "grad_norm": 0.2061228051138228, "learning_rate": 1.9997585283885588e-05, "loss": 0.3444, "step": 461 }, { "epoch": 0.03688033846890716, "grad_norm": 0.20699643616582017, "learning_rate": 1.999752813517368e-05, "loss": 0.3876, "step": 462 }, { "epoch": 0.03696016604135068, "grad_norm": 0.2395645254775981, "learning_rate": 1.999747031816659e-05, "loss": 0.4361, "step": 463 }, { "epoch": 0.0370399936137942, "grad_norm": 0.24800291995659457, "learning_rate": 1.9997411832868164e-05, "loss": 0.4265, "step": 464 }, { "epoch": 0.037119821186237724, "grad_norm": 0.22475266489919227, "learning_rate": 1.9997352679282326e-05, "loss": 0.3835, "step": 465 }, { "epoch": 0.03719964875868125, "grad_norm": 0.20336118950495807, "learning_rate": 1.9997292857413025e-05, "loss": 0.3943, "step": 466 }, { "epoch": 0.03727947633112477, "grad_norm": 0.234378585256553, "learning_rate": 1.999723236726426e-05, "loss": 0.3612, "step": 467 }, { "epoch": 0.03735930390356829, "grad_norm": 0.19355463367857034, "learning_rate": 1.9997171208840072e-05, "loss": 0.3971, "step": 468 }, { "epoch": 0.037439131476011814, "grad_norm": 0.2052619178159623, "learning_rate": 1.9997109382144555e-05, "loss": 0.4152, "step": 469 }, { "epoch": 0.037518959048455335, "grad_norm": 0.2109803323911807, "learning_rate": 1.9997046887181836e-05, "loss": 0.3768, "step": 470 }, { "epoch": 0.037598786620898855, "grad_norm": 0.20866663279135575, "learning_rate": 1.9996983723956097e-05, "loss": 0.3851, "step": 471 }, { "epoch": 0.03767861419334238, "grad_norm": 0.20212677014018143, "learning_rate": 1.999691989247156e-05, "loss": 0.3889, "step": 472 }, { "epoch": 0.037758441765785904, "grad_norm": 0.21517106519470913, "learning_rate": 1.9996855392732483e-05, "loss": 0.3629, "step": 473 }, { "epoch": 0.037838269338229424, "grad_norm": 0.22150737298979173, "learning_rate": 1.999679022474319e-05, "loss": 0.4601, "step": 474 }, { "epoch": 0.037918096910672945, "grad_norm": 0.1856994413000417, "learning_rate": 1.9996724388508033e-05, "loss": 0.4045, "step": 475 }, { "epoch": 0.037997924483116466, "grad_norm": 0.2126167520555856, "learning_rate": 1.999665788403141e-05, "loss": 0.3714, "step": 476 }, { "epoch": 0.038077752055559994, "grad_norm": 0.19134737776355104, "learning_rate": 1.9996590711317768e-05, "loss": 0.3861, "step": 477 }, { "epoch": 0.038157579628003514, "grad_norm": 0.18584921175317493, "learning_rate": 1.9996522870371596e-05, "loss": 0.3678, "step": 478 }, { "epoch": 0.038237407200447035, "grad_norm": 0.1816789200010099, "learning_rate": 1.9996454361197436e-05, "loss": 0.3793, "step": 479 }, { "epoch": 0.038317234772890556, "grad_norm": 0.2071286325837549, "learning_rate": 1.9996385183799857e-05, "loss": 0.4041, "step": 480 }, { "epoch": 0.038397062345334076, "grad_norm": 0.21942125280058006, "learning_rate": 1.9996315338183493e-05, "loss": 0.4255, "step": 481 }, { "epoch": 0.0384768899177776, "grad_norm": 0.17835777325416521, "learning_rate": 1.9996244824353007e-05, "loss": 0.3788, "step": 482 }, { "epoch": 0.038556717490221125, "grad_norm": 0.19472300270935738, "learning_rate": 1.9996173642313113e-05, "loss": 0.3848, "step": 483 }, { "epoch": 0.038636545062664646, "grad_norm": 0.20984406552705173, "learning_rate": 1.999610179206857e-05, "loss": 0.3511, "step": 484 }, { "epoch": 0.038716372635108166, "grad_norm": 0.18032736910122774, "learning_rate": 1.999602927362418e-05, "loss": 0.3951, "step": 485 }, { "epoch": 0.03879620020755169, "grad_norm": 0.20347053855840433, "learning_rate": 1.999595608698479e-05, "loss": 0.383, "step": 486 }, { "epoch": 0.03887602777999521, "grad_norm": 0.20481851770563655, "learning_rate": 1.99958822321553e-05, "loss": 0.3982, "step": 487 }, { "epoch": 0.038955855352438735, "grad_norm": 0.183029594902679, "learning_rate": 1.9995807709140634e-05, "loss": 0.3565, "step": 488 }, { "epoch": 0.039035682924882256, "grad_norm": 0.190491864306789, "learning_rate": 1.9995732517945783e-05, "loss": 0.3966, "step": 489 }, { "epoch": 0.03911551049732578, "grad_norm": 0.2041641624502582, "learning_rate": 1.9995656658575772e-05, "loss": 0.3701, "step": 490 }, { "epoch": 0.0391953380697693, "grad_norm": 0.22499704886115718, "learning_rate": 1.999558013103567e-05, "loss": 0.4093, "step": 491 }, { "epoch": 0.03927516564221282, "grad_norm": 0.1932500608971232, "learning_rate": 1.999550293533059e-05, "loss": 0.4021, "step": 492 }, { "epoch": 0.03935499321465634, "grad_norm": 0.16549038380690478, "learning_rate": 1.9995425071465698e-05, "loss": 0.3927, "step": 493 }, { "epoch": 0.03943482078709987, "grad_norm": 0.17961610153711133, "learning_rate": 1.9995346539446194e-05, "loss": 0.3661, "step": 494 }, { "epoch": 0.03951464835954339, "grad_norm": 0.1825477835002042, "learning_rate": 1.9995267339277334e-05, "loss": 0.3949, "step": 495 }, { "epoch": 0.03959447593198691, "grad_norm": 0.17957379892993106, "learning_rate": 1.9995187470964402e-05, "loss": 0.416, "step": 496 }, { "epoch": 0.03967430350443043, "grad_norm": 0.21410391598369066, "learning_rate": 1.9995106934512746e-05, "loss": 0.3409, "step": 497 }, { "epoch": 0.03975413107687395, "grad_norm": 0.1814038185013964, "learning_rate": 1.9995025729927745e-05, "loss": 0.4093, "step": 498 }, { "epoch": 0.03983395864931748, "grad_norm": 0.1851072364477728, "learning_rate": 1.999494385721483e-05, "loss": 0.3963, "step": 499 }, { "epoch": 0.039913786221761, "grad_norm": 0.15725889899807818, "learning_rate": 1.999486131637947e-05, "loss": 0.3365, "step": 500 }, { "epoch": 0.03999361379420452, "grad_norm": 0.17301349152500625, "learning_rate": 1.999477810742719e-05, "loss": 0.3449, "step": 501 }, { "epoch": 0.04007344136664804, "grad_norm": 0.18488334305232842, "learning_rate": 1.9994694230363543e-05, "loss": 0.3993, "step": 502 }, { "epoch": 0.04015326893909156, "grad_norm": 0.17398017106044947, "learning_rate": 1.999460968519414e-05, "loss": 0.3386, "step": 503 }, { "epoch": 0.04023309651153508, "grad_norm": 0.1630519780638082, "learning_rate": 1.9994524471924636e-05, "loss": 0.3455, "step": 504 }, { "epoch": 0.04031292408397861, "grad_norm": 0.18380850116052733, "learning_rate": 1.9994438590560715e-05, "loss": 0.3814, "step": 505 }, { "epoch": 0.04039275165642213, "grad_norm": 0.17122467809563055, "learning_rate": 1.9994352041108133e-05, "loss": 0.3303, "step": 506 }, { "epoch": 0.04047257922886565, "grad_norm": 0.16388728431066443, "learning_rate": 1.999426482357267e-05, "loss": 0.4061, "step": 507 }, { "epoch": 0.04055240680130917, "grad_norm": 0.1905899647578814, "learning_rate": 1.9994176937960152e-05, "loss": 0.3768, "step": 508 }, { "epoch": 0.04063223437375269, "grad_norm": 0.17769104091135335, "learning_rate": 1.999408838427646e-05, "loss": 0.3849, "step": 509 }, { "epoch": 0.04071206194619622, "grad_norm": 0.17450806758569037, "learning_rate": 1.9993999162527505e-05, "loss": 0.3672, "step": 510 }, { "epoch": 0.04079188951863974, "grad_norm": 0.16825509093259403, "learning_rate": 1.9993909272719262e-05, "loss": 0.3885, "step": 511 }, { "epoch": 0.04087171709108326, "grad_norm": 0.16859940266668114, "learning_rate": 1.9993818714857732e-05, "loss": 0.3524, "step": 512 }, { "epoch": 0.04095154466352678, "grad_norm": 0.17383437869871768, "learning_rate": 1.999372748894897e-05, "loss": 0.3481, "step": 513 }, { "epoch": 0.0410313722359703, "grad_norm": 0.17478130078277895, "learning_rate": 1.9993635594999077e-05, "loss": 0.4072, "step": 514 }, { "epoch": 0.04111119980841382, "grad_norm": 0.15639072973395401, "learning_rate": 1.9993543033014192e-05, "loss": 0.3829, "step": 515 }, { "epoch": 0.04119102738085735, "grad_norm": 0.19090061946394787, "learning_rate": 1.9993449803000503e-05, "loss": 0.3729, "step": 516 }, { "epoch": 0.04127085495330087, "grad_norm": 0.17530614993280838, "learning_rate": 1.9993355904964245e-05, "loss": 0.4118, "step": 517 }, { "epoch": 0.04135068252574439, "grad_norm": 0.173998335282176, "learning_rate": 1.999326133891169e-05, "loss": 0.3897, "step": 518 }, { "epoch": 0.04143051009818791, "grad_norm": 0.17041530406104477, "learning_rate": 1.9993166104849164e-05, "loss": 0.3765, "step": 519 }, { "epoch": 0.041510337670631434, "grad_norm": 0.17686953861256008, "learning_rate": 1.9993070202783033e-05, "loss": 0.3938, "step": 520 }, { "epoch": 0.04159016524307496, "grad_norm": 0.1637646723963023, "learning_rate": 1.9992973632719703e-05, "loss": 0.393, "step": 521 }, { "epoch": 0.04166999281551848, "grad_norm": 0.16265820966899666, "learning_rate": 1.999287639466563e-05, "loss": 0.3826, "step": 522 }, { "epoch": 0.041749820387962, "grad_norm": 0.17409026883869827, "learning_rate": 1.999277848862732e-05, "loss": 0.3339, "step": 523 }, { "epoch": 0.041829647960405524, "grad_norm": 0.16814845943017828, "learning_rate": 1.999267991461131e-05, "loss": 0.3972, "step": 524 }, { "epoch": 0.041909475532849044, "grad_norm": 0.16597843403917475, "learning_rate": 1.9992580672624196e-05, "loss": 0.3817, "step": 525 }, { "epoch": 0.041989303105292565, "grad_norm": 0.1839676828398958, "learning_rate": 1.9992480762672607e-05, "loss": 0.3457, "step": 526 }, { "epoch": 0.04206913067773609, "grad_norm": 0.23822306838410237, "learning_rate": 1.9992380184763226e-05, "loss": 0.3919, "step": 527 }, { "epoch": 0.04214895825017961, "grad_norm": 0.18662154663785954, "learning_rate": 1.9992278938902772e-05, "loss": 0.3368, "step": 528 }, { "epoch": 0.042228785822623134, "grad_norm": 0.19564654794725578, "learning_rate": 1.9992177025098015e-05, "loss": 0.3792, "step": 529 }, { "epoch": 0.042308613395066655, "grad_norm": 0.19831012069564263, "learning_rate": 1.9992074443355768e-05, "loss": 0.3622, "step": 530 }, { "epoch": 0.042388440967510176, "grad_norm": 0.17838862586870677, "learning_rate": 1.9991971193682884e-05, "loss": 0.3617, "step": 531 }, { "epoch": 0.0424682685399537, "grad_norm": 0.17205922872989754, "learning_rate": 1.9991867276086273e-05, "loss": 0.3914, "step": 532 }, { "epoch": 0.042548096112397224, "grad_norm": 0.17630640253711477, "learning_rate": 1.999176269057287e-05, "loss": 0.3879, "step": 533 }, { "epoch": 0.042627923684840745, "grad_norm": 0.17547645207607118, "learning_rate": 1.9991657437149682e-05, "loss": 0.3446, "step": 534 }, { "epoch": 0.042707751257284265, "grad_norm": 0.17765458601529455, "learning_rate": 1.9991551515823728e-05, "loss": 0.3314, "step": 535 }, { "epoch": 0.042787578829727786, "grad_norm": 0.17064131081336656, "learning_rate": 1.99914449266021e-05, "loss": 0.3529, "step": 536 }, { "epoch": 0.04286740640217131, "grad_norm": 0.1660980303851995, "learning_rate": 1.9991337669491915e-05, "loss": 0.3591, "step": 537 }, { "epoch": 0.042947233974614835, "grad_norm": 0.18174151060880206, "learning_rate": 1.999122974450035e-05, "loss": 0.3891, "step": 538 }, { "epoch": 0.043027061547058355, "grad_norm": 0.15830338384972806, "learning_rate": 1.9991121151634615e-05, "loss": 0.3789, "step": 539 }, { "epoch": 0.043106889119501876, "grad_norm": 0.18249959034610214, "learning_rate": 1.9991011890901972e-05, "loss": 0.3328, "step": 540 }, { "epoch": 0.0431867166919454, "grad_norm": 0.15941629479825764, "learning_rate": 1.9990901962309718e-05, "loss": 0.3411, "step": 541 }, { "epoch": 0.04326654426438892, "grad_norm": 0.16607135378140772, "learning_rate": 1.999079136586521e-05, "loss": 0.3383, "step": 542 }, { "epoch": 0.043346371836832445, "grad_norm": 0.1596393018002295, "learning_rate": 1.9990680101575835e-05, "loss": 0.3346, "step": 543 }, { "epoch": 0.043426199409275966, "grad_norm": 0.1678470722730336, "learning_rate": 1.9990568169449033e-05, "loss": 0.3657, "step": 544 }, { "epoch": 0.04350602698171949, "grad_norm": 0.16924510952264718, "learning_rate": 1.999045556949229e-05, "loss": 0.3474, "step": 545 }, { "epoch": 0.04358585455416301, "grad_norm": 0.17174175764783425, "learning_rate": 1.9990342301713125e-05, "loss": 0.3767, "step": 546 }, { "epoch": 0.04366568212660653, "grad_norm": 0.18565553277087837, "learning_rate": 1.9990228366119113e-05, "loss": 0.3838, "step": 547 }, { "epoch": 0.04374550969905005, "grad_norm": 0.1718622460793188, "learning_rate": 1.9990113762717867e-05, "loss": 0.3388, "step": 548 }, { "epoch": 0.04382533727149358, "grad_norm": 0.1702786075099049, "learning_rate": 1.9989998491517056e-05, "loss": 0.3562, "step": 549 }, { "epoch": 0.0439051648439371, "grad_norm": 0.18948896482486022, "learning_rate": 1.9989882552524376e-05, "loss": 0.329, "step": 550 }, { "epoch": 0.04398499241638062, "grad_norm": 0.15160519258736352, "learning_rate": 1.9989765945747587e-05, "loss": 0.3871, "step": 551 }, { "epoch": 0.04406481998882414, "grad_norm": 0.1676957647468343, "learning_rate": 1.9989648671194475e-05, "loss": 0.3227, "step": 552 }, { "epoch": 0.04414464756126766, "grad_norm": 0.17963882712428778, "learning_rate": 1.9989530728872886e-05, "loss": 0.396, "step": 553 }, { "epoch": 0.04422447513371119, "grad_norm": 0.1812082633395112, "learning_rate": 1.9989412118790697e-05, "loss": 0.326, "step": 554 }, { "epoch": 0.04430430270615471, "grad_norm": 0.18881176536537814, "learning_rate": 1.998929284095584e-05, "loss": 0.3765, "step": 555 }, { "epoch": 0.04438413027859823, "grad_norm": 0.19248740972922154, "learning_rate": 1.998917289537629e-05, "loss": 0.3446, "step": 556 }, { "epoch": 0.04446395785104175, "grad_norm": 0.1555273625456736, "learning_rate": 1.9989052282060064e-05, "loss": 0.3618, "step": 557 }, { "epoch": 0.04454378542348527, "grad_norm": 0.17720499660931627, "learning_rate": 1.9988931001015222e-05, "loss": 0.3445, "step": 558 }, { "epoch": 0.04462361299592879, "grad_norm": 0.17353576842140872, "learning_rate": 1.9988809052249873e-05, "loss": 0.3596, "step": 559 }, { "epoch": 0.04470344056837232, "grad_norm": 0.16274689924173094, "learning_rate": 1.998868643577217e-05, "loss": 0.3557, "step": 560 }, { "epoch": 0.04478326814081584, "grad_norm": 0.15983659884138587, "learning_rate": 1.9988563151590308e-05, "loss": 0.3526, "step": 561 }, { "epoch": 0.04486309571325936, "grad_norm": 0.1752394761137251, "learning_rate": 1.9988439199712527e-05, "loss": 0.3964, "step": 562 }, { "epoch": 0.04494292328570288, "grad_norm": 0.16698743385613235, "learning_rate": 1.9988314580147117e-05, "loss": 0.3715, "step": 563 }, { "epoch": 0.0450227508581464, "grad_norm": 0.16009222231226988, "learning_rate": 1.99881892929024e-05, "loss": 0.3464, "step": 564 }, { "epoch": 0.04510257843058993, "grad_norm": 0.17700968486054225, "learning_rate": 1.998806333798676e-05, "loss": 0.3535, "step": 565 }, { "epoch": 0.04518240600303345, "grad_norm": 0.15896864628472354, "learning_rate": 1.9987936715408612e-05, "loss": 0.346, "step": 566 }, { "epoch": 0.04526223357547697, "grad_norm": 0.17352691159221342, "learning_rate": 1.9987809425176424e-05, "loss": 0.4267, "step": 567 }, { "epoch": 0.04534206114792049, "grad_norm": 0.17230148656373984, "learning_rate": 1.99876814672987e-05, "loss": 0.396, "step": 568 }, { "epoch": 0.04542188872036401, "grad_norm": 0.16848930737279677, "learning_rate": 1.9987552841783993e-05, "loss": 0.3606, "step": 569 }, { "epoch": 0.04550171629280753, "grad_norm": 0.17879001860331628, "learning_rate": 1.9987423548640906e-05, "loss": 0.339, "step": 570 }, { "epoch": 0.04558154386525106, "grad_norm": 0.1616554629892271, "learning_rate": 1.9987293587878076e-05, "loss": 0.3789, "step": 571 }, { "epoch": 0.04566137143769458, "grad_norm": 0.1650529385328007, "learning_rate": 1.9987162959504198e-05, "loss": 0.3293, "step": 572 }, { "epoch": 0.0457411990101381, "grad_norm": 0.15088212876132887, "learning_rate": 1.9987031663527998e-05, "loss": 0.3511, "step": 573 }, { "epoch": 0.04582102658258162, "grad_norm": 0.16328372363897597, "learning_rate": 1.9986899699958253e-05, "loss": 0.3452, "step": 574 }, { "epoch": 0.04590085415502514, "grad_norm": 0.17384093836902884, "learning_rate": 1.9986767068803784e-05, "loss": 0.3444, "step": 575 }, { "epoch": 0.04598068172746867, "grad_norm": 0.1822087638655109, "learning_rate": 1.998663377007346e-05, "loss": 0.3748, "step": 576 }, { "epoch": 0.04606050929991219, "grad_norm": 0.15776662467317684, "learning_rate": 1.998649980377619e-05, "loss": 0.3287, "step": 577 }, { "epoch": 0.04614033687235571, "grad_norm": 0.15481585249007818, "learning_rate": 1.998636516992093e-05, "loss": 0.3466, "step": 578 }, { "epoch": 0.04622016444479923, "grad_norm": 0.20761994404552223, "learning_rate": 1.998622986851668e-05, "loss": 0.3597, "step": 579 }, { "epoch": 0.046299992017242754, "grad_norm": 0.16164645460593885, "learning_rate": 1.998609389957248e-05, "loss": 0.4051, "step": 580 }, { "epoch": 0.046379819589686275, "grad_norm": 0.15875553869744038, "learning_rate": 1.9985957263097423e-05, "loss": 0.3643, "step": 581 }, { "epoch": 0.0464596471621298, "grad_norm": 0.16342756221614405, "learning_rate": 1.998581995910064e-05, "loss": 0.3663, "step": 582 }, { "epoch": 0.04653947473457332, "grad_norm": 0.16531895374210212, "learning_rate": 1.9985681987591314e-05, "loss": 0.3172, "step": 583 }, { "epoch": 0.046619302307016844, "grad_norm": 0.17539903416969235, "learning_rate": 1.9985543348578664e-05, "loss": 0.3533, "step": 584 }, { "epoch": 0.046699129879460365, "grad_norm": 0.17092831727994168, "learning_rate": 1.9985404042071955e-05, "loss": 0.3165, "step": 585 }, { "epoch": 0.046778957451903885, "grad_norm": 0.1623765707436511, "learning_rate": 1.9985264068080505e-05, "loss": 0.3654, "step": 586 }, { "epoch": 0.04685878502434741, "grad_norm": 0.18241848436019864, "learning_rate": 1.998512342661367e-05, "loss": 0.3566, "step": 587 }, { "epoch": 0.046938612596790934, "grad_norm": 0.16179151002492648, "learning_rate": 1.9984982117680845e-05, "loss": 0.4178, "step": 588 }, { "epoch": 0.047018440169234454, "grad_norm": 0.1673106887686587, "learning_rate": 1.998484014129148e-05, "loss": 0.3301, "step": 589 }, { "epoch": 0.047098267741677975, "grad_norm": 0.1485982864086722, "learning_rate": 1.9984697497455068e-05, "loss": 0.3212, "step": 590 }, { "epoch": 0.047178095314121496, "grad_norm": 0.18716269792069468, "learning_rate": 1.9984554186181144e-05, "loss": 0.3497, "step": 591 }, { "epoch": 0.04725792288656502, "grad_norm": 0.17084543728226534, "learning_rate": 1.9984410207479282e-05, "loss": 0.3758, "step": 592 }, { "epoch": 0.047337750459008544, "grad_norm": 0.16941534501585195, "learning_rate": 1.998426556135911e-05, "loss": 0.337, "step": 593 }, { "epoch": 0.047417578031452065, "grad_norm": 0.1596302023699249, "learning_rate": 1.9984120247830302e-05, "loss": 0.3587, "step": 594 }, { "epoch": 0.047497405603895586, "grad_norm": 0.15943131243534547, "learning_rate": 1.998397426690256e-05, "loss": 0.3475, "step": 595 }, { "epoch": 0.04757723317633911, "grad_norm": 0.17365008591929001, "learning_rate": 1.9983827618585655e-05, "loss": 0.4179, "step": 596 }, { "epoch": 0.04765706074878263, "grad_norm": 0.17820302663845194, "learning_rate": 1.9983680302889382e-05, "loss": 0.3444, "step": 597 }, { "epoch": 0.047736888321226155, "grad_norm": 0.16364113968063332, "learning_rate": 1.9983532319823588e-05, "loss": 0.2912, "step": 598 }, { "epoch": 0.047816715893669676, "grad_norm": 0.16062601630224918, "learning_rate": 1.998338366939817e-05, "loss": 0.2942, "step": 599 }, { "epoch": 0.047896543466113196, "grad_norm": 0.1541933764775175, "learning_rate": 1.9983234351623063e-05, "loss": 0.3433, "step": 600 }, { "epoch": 0.04797637103855672, "grad_norm": 0.17013691269506617, "learning_rate": 1.9983084366508244e-05, "loss": 0.3742, "step": 601 }, { "epoch": 0.04805619861100024, "grad_norm": 0.1569706847062842, "learning_rate": 1.9982933714063745e-05, "loss": 0.3228, "step": 602 }, { "epoch": 0.04813602618344376, "grad_norm": 0.17799720825638424, "learning_rate": 1.9982782394299633e-05, "loss": 0.3895, "step": 603 }, { "epoch": 0.048215853755887286, "grad_norm": 0.1715584148946107, "learning_rate": 1.9982630407226024e-05, "loss": 0.3282, "step": 604 }, { "epoch": 0.04829568132833081, "grad_norm": 0.14932378468294794, "learning_rate": 1.998247775285308e-05, "loss": 0.3564, "step": 605 }, { "epoch": 0.04837550890077433, "grad_norm": 0.1696127475532547, "learning_rate": 1.9982324431191e-05, "loss": 0.374, "step": 606 }, { "epoch": 0.04845533647321785, "grad_norm": 0.17197520619865403, "learning_rate": 1.9982170442250038e-05, "loss": 0.3338, "step": 607 }, { "epoch": 0.04853516404566137, "grad_norm": 0.20270664157856766, "learning_rate": 1.9982015786040484e-05, "loss": 0.3495, "step": 608 }, { "epoch": 0.0486149916181049, "grad_norm": 0.15962706902848667, "learning_rate": 1.9981860462572678e-05, "loss": 0.3505, "step": 609 }, { "epoch": 0.04869481919054842, "grad_norm": 0.18529713394368277, "learning_rate": 1.9981704471857003e-05, "loss": 0.3951, "step": 610 }, { "epoch": 0.04877464676299194, "grad_norm": 0.16795495161438412, "learning_rate": 1.9981547813903886e-05, "loss": 0.3197, "step": 611 }, { "epoch": 0.04885447433543546, "grad_norm": 0.1717752803905062, "learning_rate": 1.9981390488723798e-05, "loss": 0.3441, "step": 612 }, { "epoch": 0.04893430190787898, "grad_norm": 0.17223772702603507, "learning_rate": 1.998123249632726e-05, "loss": 0.3799, "step": 613 }, { "epoch": 0.0490141294803225, "grad_norm": 0.18072401573799843, "learning_rate": 1.9981073836724824e-05, "loss": 0.3216, "step": 614 }, { "epoch": 0.04909395705276603, "grad_norm": 0.14540640957647924, "learning_rate": 1.9980914509927104e-05, "loss": 0.2994, "step": 615 }, { "epoch": 0.04917378462520955, "grad_norm": 0.1730959562630139, "learning_rate": 1.998075451594475e-05, "loss": 0.3411, "step": 616 }, { "epoch": 0.04925361219765307, "grad_norm": 0.18050771084556688, "learning_rate": 1.9980593854788453e-05, "loss": 0.3476, "step": 617 }, { "epoch": 0.04933343977009659, "grad_norm": 0.17526943541460474, "learning_rate": 1.998043252646895e-05, "loss": 0.388, "step": 618 }, { "epoch": 0.04941326734254011, "grad_norm": 0.17304556038863236, "learning_rate": 1.9980270530997034e-05, "loss": 0.3146, "step": 619 }, { "epoch": 0.04949309491498363, "grad_norm": 0.1683960628100999, "learning_rate": 1.9980107868383532e-05, "loss": 0.3571, "step": 620 }, { "epoch": 0.04957292248742716, "grad_norm": 0.18334168583943025, "learning_rate": 1.997994453863931e-05, "loss": 0.2888, "step": 621 }, { "epoch": 0.04965275005987068, "grad_norm": 0.14942437767597755, "learning_rate": 1.997978054177529e-05, "loss": 0.3722, "step": 622 }, { "epoch": 0.0497325776323142, "grad_norm": 0.15042196058313595, "learning_rate": 1.9979615877802438e-05, "loss": 0.3946, "step": 623 }, { "epoch": 0.04981240520475772, "grad_norm": 0.1614377036457642, "learning_rate": 1.9979450546731755e-05, "loss": 0.3077, "step": 624 }, { "epoch": 0.04989223277720124, "grad_norm": 0.17413624631105354, "learning_rate": 1.9979284548574295e-05, "loss": 0.3459, "step": 625 }, { "epoch": 0.04997206034964477, "grad_norm": 0.1594081927243406, "learning_rate": 1.997911788334116e-05, "loss": 0.3536, "step": 626 }, { "epoch": 0.05005188792208829, "grad_norm": 0.15039648862582394, "learning_rate": 1.997895055104348e-05, "loss": 0.3665, "step": 627 }, { "epoch": 0.05013171549453181, "grad_norm": 0.16928737229945445, "learning_rate": 1.9978782551692454e-05, "loss": 0.3058, "step": 628 }, { "epoch": 0.05021154306697533, "grad_norm": 0.18597198292721012, "learning_rate": 1.99786138852993e-05, "loss": 0.3529, "step": 629 }, { "epoch": 0.05029137063941885, "grad_norm": 0.17468436590294115, "learning_rate": 1.9978444551875296e-05, "loss": 0.2884, "step": 630 }, { "epoch": 0.050371198211862374, "grad_norm": 0.1660700321253158, "learning_rate": 1.9978274551431768e-05, "loss": 0.2971, "step": 631 }, { "epoch": 0.0504510257843059, "grad_norm": 0.1710952559135375, "learning_rate": 1.997810388398007e-05, "loss": 0.3431, "step": 632 }, { "epoch": 0.05053085335674942, "grad_norm": 0.17438692488300947, "learning_rate": 1.9977932549531617e-05, "loss": 0.3972, "step": 633 }, { "epoch": 0.05061068092919294, "grad_norm": 0.16939563225109458, "learning_rate": 1.9977760548097857e-05, "loss": 0.3302, "step": 634 }, { "epoch": 0.050690508501636464, "grad_norm": 0.15619488269310164, "learning_rate": 1.9977587879690293e-05, "loss": 0.332, "step": 635 }, { "epoch": 0.050770336074079984, "grad_norm": 0.15053357665235717, "learning_rate": 1.9977414544320463e-05, "loss": 0.3253, "step": 636 }, { "epoch": 0.05085016364652351, "grad_norm": 0.1651479485075937, "learning_rate": 1.997724054199996e-05, "loss": 0.377, "step": 637 }, { "epoch": 0.05092999121896703, "grad_norm": 0.16669999790134193, "learning_rate": 1.9977065872740407e-05, "loss": 0.342, "step": 638 }, { "epoch": 0.051009818791410554, "grad_norm": 0.1560840472350113, "learning_rate": 1.9976890536553482e-05, "loss": 0.3652, "step": 639 }, { "epoch": 0.051089646363854074, "grad_norm": 0.16284620320866913, "learning_rate": 1.997671453345091e-05, "loss": 0.3624, "step": 640 }, { "epoch": 0.051169473936297595, "grad_norm": 0.16579446697730044, "learning_rate": 1.9976537863444453e-05, "loss": 0.3326, "step": 641 }, { "epoch": 0.051249301508741116, "grad_norm": 0.15357081612362125, "learning_rate": 1.9976360526545924e-05, "loss": 0.3702, "step": 642 }, { "epoch": 0.05132912908118464, "grad_norm": 0.1591103807478018, "learning_rate": 1.997618252276717e-05, "loss": 0.3544, "step": 643 }, { "epoch": 0.051408956653628164, "grad_norm": 0.15861605509617271, "learning_rate": 1.99760038521201e-05, "loss": 0.328, "step": 644 }, { "epoch": 0.051488784226071685, "grad_norm": 0.17073016848301956, "learning_rate": 1.9975824514616648e-05, "loss": 0.348, "step": 645 }, { "epoch": 0.051568611798515206, "grad_norm": 0.1609573822886052, "learning_rate": 1.9975644510268805e-05, "loss": 0.3376, "step": 646 }, { "epoch": 0.051648439370958726, "grad_norm": 0.1690211869303295, "learning_rate": 1.9975463839088606e-05, "loss": 0.3601, "step": 647 }, { "epoch": 0.051728266943402254, "grad_norm": 0.15773491009182206, "learning_rate": 1.9975282501088126e-05, "loss": 0.3446, "step": 648 }, { "epoch": 0.051808094515845775, "grad_norm": 0.15737943251365982, "learning_rate": 1.997510049627949e-05, "loss": 0.3616, "step": 649 }, { "epoch": 0.051887922088289296, "grad_norm": 0.16699160796582713, "learning_rate": 1.9974917824674856e-05, "loss": 0.3576, "step": 650 }, { "epoch": 0.051967749660732816, "grad_norm": 0.16041569830728417, "learning_rate": 1.9974734486286443e-05, "loss": 0.315, "step": 651 }, { "epoch": 0.05204757723317634, "grad_norm": 0.15912316937351279, "learning_rate": 1.9974550481126505e-05, "loss": 0.3329, "step": 652 }, { "epoch": 0.05212740480561986, "grad_norm": 0.17064480759800926, "learning_rate": 1.997436580920734e-05, "loss": 0.3505, "step": 653 }, { "epoch": 0.052207232378063385, "grad_norm": 0.15253545438205332, "learning_rate": 1.997418047054129e-05, "loss": 0.3661, "step": 654 }, { "epoch": 0.052287059950506906, "grad_norm": 0.17019023004342496, "learning_rate": 1.9973994465140753e-05, "loss": 0.3257, "step": 655 }, { "epoch": 0.05236688752295043, "grad_norm": 0.16046289918096532, "learning_rate": 1.9973807793018155e-05, "loss": 0.3146, "step": 656 }, { "epoch": 0.05244671509539395, "grad_norm": 0.16380714155755316, "learning_rate": 1.9973620454185974e-05, "loss": 0.3115, "step": 657 }, { "epoch": 0.05252654266783747, "grad_norm": 0.14975153956805173, "learning_rate": 1.997343244865674e-05, "loss": 0.3323, "step": 658 }, { "epoch": 0.052606370240280996, "grad_norm": 0.15454521200783433, "learning_rate": 1.9973243776443015e-05, "loss": 0.3347, "step": 659 }, { "epoch": 0.05268619781272452, "grad_norm": 0.14814969038725986, "learning_rate": 1.997305443755741e-05, "loss": 0.3253, "step": 660 }, { "epoch": 0.05276602538516804, "grad_norm": 0.17511980740053357, "learning_rate": 1.9972864432012585e-05, "loss": 0.3826, "step": 661 }, { "epoch": 0.05284585295761156, "grad_norm": 0.1677269012416274, "learning_rate": 1.9972673759821238e-05, "loss": 0.3158, "step": 662 }, { "epoch": 0.05292568053005508, "grad_norm": 0.1807055152123051, "learning_rate": 1.9972482420996117e-05, "loss": 0.2994, "step": 663 }, { "epoch": 0.0530055081024986, "grad_norm": 0.19257566428749553, "learning_rate": 1.997229041555001e-05, "loss": 0.3443, "step": 664 }, { "epoch": 0.05308533567494213, "grad_norm": 0.18200288027707306, "learning_rate": 1.9972097743495756e-05, "loss": 0.3077, "step": 665 }, { "epoch": 0.05316516324738565, "grad_norm": 0.1839003893634442, "learning_rate": 1.9971904404846233e-05, "loss": 0.3081, "step": 666 }, { "epoch": 0.05324499081982917, "grad_norm": 0.1675417263805935, "learning_rate": 1.997171039961436e-05, "loss": 0.2725, "step": 667 }, { "epoch": 0.05332481839227269, "grad_norm": 0.1687410824526452, "learning_rate": 1.9971515727813112e-05, "loss": 0.3228, "step": 668 }, { "epoch": 0.05340464596471621, "grad_norm": 0.1547516832644251, "learning_rate": 1.9971320389455494e-05, "loss": 0.335, "step": 669 }, { "epoch": 0.05348447353715974, "grad_norm": 0.14340475286938933, "learning_rate": 1.9971124384554576e-05, "loss": 0.2878, "step": 670 }, { "epoch": 0.05356430110960326, "grad_norm": 0.16266374982151563, "learning_rate": 1.9970927713123448e-05, "loss": 0.2968, "step": 671 }, { "epoch": 0.05364412868204678, "grad_norm": 0.17111329407888948, "learning_rate": 1.997073037517526e-05, "loss": 0.3775, "step": 672 }, { "epoch": 0.0537239562544903, "grad_norm": 0.15094882944604784, "learning_rate": 1.997053237072321e-05, "loss": 0.3617, "step": 673 }, { "epoch": 0.05380378382693382, "grad_norm": 0.17958761931074738, "learning_rate": 1.9970333699780523e-05, "loss": 0.2968, "step": 674 }, { "epoch": 0.05388361139937734, "grad_norm": 0.1601136117366151, "learning_rate": 1.9970134362360488e-05, "loss": 0.267, "step": 675 }, { "epoch": 0.05396343897182087, "grad_norm": 0.16164431527969628, "learning_rate": 1.9969934358476425e-05, "loss": 0.3124, "step": 676 }, { "epoch": 0.05404326654426439, "grad_norm": 0.1753417182448066, "learning_rate": 1.996973368814171e-05, "loss": 0.3304, "step": 677 }, { "epoch": 0.05412309411670791, "grad_norm": 0.19162846151888663, "learning_rate": 1.9969532351369748e-05, "loss": 0.3298, "step": 678 }, { "epoch": 0.05420292168915143, "grad_norm": 0.15408228786178213, "learning_rate": 1.9969330348174e-05, "loss": 0.3361, "step": 679 }, { "epoch": 0.05428274926159495, "grad_norm": 0.17750768248285925, "learning_rate": 1.996912767856798e-05, "loss": 0.3244, "step": 680 }, { "epoch": 0.05436257683403848, "grad_norm": 0.1915628279781806, "learning_rate": 1.996892434256522e-05, "loss": 0.4079, "step": 681 }, { "epoch": 0.054442404406482, "grad_norm": 0.16987648678796563, "learning_rate": 1.996872034017932e-05, "loss": 0.2932, "step": 682 }, { "epoch": 0.05452223197892552, "grad_norm": 0.17015712831604388, "learning_rate": 1.9968515671423913e-05, "loss": 0.2894, "step": 683 }, { "epoch": 0.05460205955136904, "grad_norm": 0.17304773047091357, "learning_rate": 1.9968310336312687e-05, "loss": 0.3238, "step": 684 }, { "epoch": 0.05468188712381256, "grad_norm": 0.18620361122076384, "learning_rate": 1.996810433485936e-05, "loss": 0.3107, "step": 685 }, { "epoch": 0.054761714696256084, "grad_norm": 0.20861798036409518, "learning_rate": 1.9967897667077715e-05, "loss": 0.3385, "step": 686 }, { "epoch": 0.05484154226869961, "grad_norm": 0.1703877010106852, "learning_rate": 1.9967690332981548e-05, "loss": 0.2855, "step": 687 }, { "epoch": 0.05492136984114313, "grad_norm": 0.1890129844797048, "learning_rate": 1.9967482332584734e-05, "loss": 0.3217, "step": 688 }, { "epoch": 0.05500119741358665, "grad_norm": 0.18271007583110724, "learning_rate": 1.996727366590117e-05, "loss": 0.3455, "step": 689 }, { "epoch": 0.05508102498603017, "grad_norm": 0.18248825197525373, "learning_rate": 1.9967064332944805e-05, "loss": 0.3616, "step": 690 }, { "epoch": 0.055160852558473694, "grad_norm": 0.1726319085567641, "learning_rate": 1.9966854333729637e-05, "loss": 0.3312, "step": 691 }, { "epoch": 0.05524068013091722, "grad_norm": 0.17555751041124468, "learning_rate": 1.9966643668269697e-05, "loss": 0.2988, "step": 692 }, { "epoch": 0.05532050770336074, "grad_norm": 0.1744316053162075, "learning_rate": 1.996643233657907e-05, "loss": 0.3533, "step": 693 }, { "epoch": 0.05540033527580426, "grad_norm": 0.15736587073466737, "learning_rate": 1.9966220338671884e-05, "loss": 0.3753, "step": 694 }, { "epoch": 0.055480162848247784, "grad_norm": 0.18209425955571362, "learning_rate": 1.9966007674562305e-05, "loss": 0.3258, "step": 695 }, { "epoch": 0.055559990420691305, "grad_norm": 0.18019663998505658, "learning_rate": 1.996579434426456e-05, "loss": 0.325, "step": 696 }, { "epoch": 0.055639817993134826, "grad_norm": 0.1944920948749921, "learning_rate": 1.9965580347792893e-05, "loss": 0.3391, "step": 697 }, { "epoch": 0.05571964556557835, "grad_norm": 0.1679447188109589, "learning_rate": 1.9965365685161625e-05, "loss": 0.3024, "step": 698 }, { "epoch": 0.055799473138021874, "grad_norm": 0.15745290773691079, "learning_rate": 1.9965150356385094e-05, "loss": 0.2974, "step": 699 }, { "epoch": 0.055879300710465395, "grad_norm": 0.16332826702159742, "learning_rate": 1.9964934361477703e-05, "loss": 0.3447, "step": 700 }, { "epoch": 0.055959128282908915, "grad_norm": 0.15648903803970723, "learning_rate": 1.996471770045388e-05, "loss": 0.3433, "step": 701 }, { "epoch": 0.056038955855352436, "grad_norm": 0.17242401627129067, "learning_rate": 1.9964500373328116e-05, "loss": 0.2835, "step": 702 }, { "epoch": 0.056118783427795964, "grad_norm": 0.16154495608071012, "learning_rate": 1.996428238011493e-05, "loss": 0.3435, "step": 703 }, { "epoch": 0.056198611000239485, "grad_norm": 0.16664944089945724, "learning_rate": 1.9964063720828907e-05, "loss": 0.3203, "step": 704 }, { "epoch": 0.056278438572683005, "grad_norm": 0.18448347786572597, "learning_rate": 1.9963844395484654e-05, "loss": 0.348, "step": 705 }, { "epoch": 0.056358266145126526, "grad_norm": 0.17374609188062615, "learning_rate": 1.9963624404096836e-05, "loss": 0.344, "step": 706 }, { "epoch": 0.05643809371757005, "grad_norm": 0.16742928439062849, "learning_rate": 1.9963403746680154e-05, "loss": 0.3168, "step": 707 }, { "epoch": 0.05651792129001357, "grad_norm": 0.15737564649460467, "learning_rate": 1.9963182423249363e-05, "loss": 0.2924, "step": 708 }, { "epoch": 0.056597748862457095, "grad_norm": 0.1573781496325615, "learning_rate": 1.9962960433819253e-05, "loss": 0.2785, "step": 709 }, { "epoch": 0.056677576434900616, "grad_norm": 0.19140483503267294, "learning_rate": 1.996273777840467e-05, "loss": 0.3019, "step": 710 }, { "epoch": 0.05675740400734414, "grad_norm": 0.18635773603820993, "learning_rate": 1.9962514457020488e-05, "loss": 0.3661, "step": 711 }, { "epoch": 0.05683723157978766, "grad_norm": 0.17502182183717638, "learning_rate": 1.9962290469681642e-05, "loss": 0.3413, "step": 712 }, { "epoch": 0.05691705915223118, "grad_norm": 0.1663652667793975, "learning_rate": 1.9962065816403105e-05, "loss": 0.366, "step": 713 }, { "epoch": 0.056996886724674706, "grad_norm": 0.1680430257498401, "learning_rate": 1.9961840497199892e-05, "loss": 0.3173, "step": 714 }, { "epoch": 0.057076714297118226, "grad_norm": 0.16038811120203983, "learning_rate": 1.9961614512087064e-05, "loss": 0.3171, "step": 715 }, { "epoch": 0.05715654186956175, "grad_norm": 0.15129550888985857, "learning_rate": 1.9961387861079728e-05, "loss": 0.327, "step": 716 }, { "epoch": 0.05723636944200527, "grad_norm": 0.1671440016857171, "learning_rate": 1.9961160544193036e-05, "loss": 0.3245, "step": 717 }, { "epoch": 0.05731619701444879, "grad_norm": 0.15425894206128182, "learning_rate": 1.996093256144218e-05, "loss": 0.3215, "step": 718 }, { "epoch": 0.05739602458689231, "grad_norm": 0.17646904768983473, "learning_rate": 1.9960703912842405e-05, "loss": 0.3078, "step": 719 }, { "epoch": 0.05747585215933584, "grad_norm": 0.16234469606319418, "learning_rate": 1.996047459840899e-05, "loss": 0.3099, "step": 720 }, { "epoch": 0.05755567973177936, "grad_norm": 0.15795001323074623, "learning_rate": 1.9960244618157265e-05, "loss": 0.3359, "step": 721 }, { "epoch": 0.05763550730422288, "grad_norm": 0.15862590820952635, "learning_rate": 1.9960013972102608e-05, "loss": 0.2827, "step": 722 }, { "epoch": 0.0577153348766664, "grad_norm": 0.16210129852658953, "learning_rate": 1.995978266026043e-05, "loss": 0.3152, "step": 723 }, { "epoch": 0.05779516244910992, "grad_norm": 0.15090732333963294, "learning_rate": 1.9959550682646195e-05, "loss": 0.3509, "step": 724 }, { "epoch": 0.05787499002155345, "grad_norm": 0.15434531686762482, "learning_rate": 1.9959318039275413e-05, "loss": 0.3311, "step": 725 }, { "epoch": 0.05795481759399697, "grad_norm": 0.14898108232420396, "learning_rate": 1.9959084730163637e-05, "loss": 0.317, "step": 726 }, { "epoch": 0.05803464516644049, "grad_norm": 0.18348739952099472, "learning_rate": 1.995885075532645e-05, "loss": 0.3101, "step": 727 }, { "epoch": 0.05811447273888401, "grad_norm": 0.16656947118038667, "learning_rate": 1.9958616114779506e-05, "loss": 0.3169, "step": 728 }, { "epoch": 0.05819430031132753, "grad_norm": 0.16560172424370473, "learning_rate": 1.995838080853849e-05, "loss": 0.3384, "step": 729 }, { "epoch": 0.05827412788377105, "grad_norm": 0.18158714240769774, "learning_rate": 1.9958144836619123e-05, "loss": 0.2778, "step": 730 }, { "epoch": 0.05835395545621458, "grad_norm": 0.15987933087526987, "learning_rate": 1.995790819903718e-05, "loss": 0.3366, "step": 731 }, { "epoch": 0.0584337830286581, "grad_norm": 0.1583263869587549, "learning_rate": 1.995767089580848e-05, "loss": 0.3155, "step": 732 }, { "epoch": 0.05851361060110162, "grad_norm": 0.16312134046162424, "learning_rate": 1.9957432926948894e-05, "loss": 0.339, "step": 733 }, { "epoch": 0.05859343817354514, "grad_norm": 0.1634482336436392, "learning_rate": 1.9957194292474317e-05, "loss": 0.3284, "step": 734 }, { "epoch": 0.05867326574598866, "grad_norm": 0.18552813410329555, "learning_rate": 1.9956954992400707e-05, "loss": 0.321, "step": 735 }, { "epoch": 0.05875309331843219, "grad_norm": 0.16381796183241326, "learning_rate": 1.995671502674406e-05, "loss": 0.3618, "step": 736 }, { "epoch": 0.05883292089087571, "grad_norm": 0.17150840999580888, "learning_rate": 1.995647439552042e-05, "loss": 0.3185, "step": 737 }, { "epoch": 0.05891274846331923, "grad_norm": 0.17014703307865267, "learning_rate": 1.995623309874587e-05, "loss": 0.3001, "step": 738 }, { "epoch": 0.05899257603576275, "grad_norm": 0.19222232599536135, "learning_rate": 1.995599113643653e-05, "loss": 0.3484, "step": 739 }, { "epoch": 0.05907240360820627, "grad_norm": 0.1706539317898475, "learning_rate": 1.9955748508608588e-05, "loss": 0.349, "step": 740 }, { "epoch": 0.05915223118064979, "grad_norm": 0.17012731101172254, "learning_rate": 1.9955505215278258e-05, "loss": 0.3455, "step": 741 }, { "epoch": 0.05923205875309332, "grad_norm": 0.17675813955613504, "learning_rate": 1.99552612564618e-05, "loss": 0.3057, "step": 742 }, { "epoch": 0.05931188632553684, "grad_norm": 0.16762789141410755, "learning_rate": 1.995501663217552e-05, "loss": 0.3207, "step": 743 }, { "epoch": 0.05939171389798036, "grad_norm": 0.17166262460519852, "learning_rate": 1.9954771342435784e-05, "loss": 0.3425, "step": 744 }, { "epoch": 0.05947154147042388, "grad_norm": 0.1698928072667564, "learning_rate": 1.9954525387258973e-05, "loss": 0.279, "step": 745 }, { "epoch": 0.059551369042867404, "grad_norm": 0.19196537751447512, "learning_rate": 1.9954278766661534e-05, "loss": 0.3061, "step": 746 }, { "epoch": 0.05963119661531093, "grad_norm": 0.17658766911919976, "learning_rate": 1.9954031480659956e-05, "loss": 0.3072, "step": 747 }, { "epoch": 0.05971102418775445, "grad_norm": 0.16162035938049174, "learning_rate": 1.9953783529270762e-05, "loss": 0.3826, "step": 748 }, { "epoch": 0.05979085176019797, "grad_norm": 0.18579868410928216, "learning_rate": 1.9953534912510533e-05, "loss": 0.3452, "step": 749 }, { "epoch": 0.059870679332641494, "grad_norm": 0.1900208156311031, "learning_rate": 1.9953285630395885e-05, "loss": 0.3026, "step": 750 }, { "epoch": 0.059950506905085015, "grad_norm": 0.2173629710314972, "learning_rate": 1.995303568294348e-05, "loss": 0.3435, "step": 751 }, { "epoch": 0.060030334477528535, "grad_norm": 0.17703833702263413, "learning_rate": 1.995278507017003e-05, "loss": 0.332, "step": 752 }, { "epoch": 0.06011016204997206, "grad_norm": 0.18321911559418352, "learning_rate": 1.9952533792092286e-05, "loss": 0.2892, "step": 753 }, { "epoch": 0.060189989622415584, "grad_norm": 0.18419400053235455, "learning_rate": 1.9952281848727044e-05, "loss": 0.3747, "step": 754 }, { "epoch": 0.060269817194859104, "grad_norm": 0.17470534927092832, "learning_rate": 1.9952029240091144e-05, "loss": 0.3525, "step": 755 }, { "epoch": 0.060349644767302625, "grad_norm": 0.20184232925060763, "learning_rate": 1.9951775966201477e-05, "loss": 0.3196, "step": 756 }, { "epoch": 0.060429472339746146, "grad_norm": 0.1676999288463538, "learning_rate": 1.9951522027074968e-05, "loss": 0.3293, "step": 757 }, { "epoch": 0.060509299912189674, "grad_norm": 0.18041267988973494, "learning_rate": 1.9951267422728594e-05, "loss": 0.2951, "step": 758 }, { "epoch": 0.060589127484633194, "grad_norm": 0.17572310973530023, "learning_rate": 1.995101215317937e-05, "loss": 0.3531, "step": 759 }, { "epoch": 0.060668955057076715, "grad_norm": 0.20766529255040125, "learning_rate": 1.9950756218444368e-05, "loss": 0.3411, "step": 760 }, { "epoch": 0.060748782629520236, "grad_norm": 0.22690268409782158, "learning_rate": 1.9950499618540692e-05, "loss": 0.3383, "step": 761 }, { "epoch": 0.060828610201963756, "grad_norm": 0.18262298229026833, "learning_rate": 1.9950242353485496e-05, "loss": 0.2608, "step": 762 }, { "epoch": 0.06090843777440728, "grad_norm": 0.17533352882527742, "learning_rate": 1.9949984423295977e-05, "loss": 0.2991, "step": 763 }, { "epoch": 0.060988265346850805, "grad_norm": 0.17561310737501215, "learning_rate": 1.994972582798937e-05, "loss": 0.34, "step": 764 }, { "epoch": 0.061068092919294326, "grad_norm": 0.17384288236293516, "learning_rate": 1.9949466567582974e-05, "loss": 0.319, "step": 765 }, { "epoch": 0.061147920491737846, "grad_norm": 0.17422927766766583, "learning_rate": 1.9949206642094107e-05, "loss": 0.2795, "step": 766 }, { "epoch": 0.06122774806418137, "grad_norm": 0.18844547610723095, "learning_rate": 1.994894605154015e-05, "loss": 0.304, "step": 767 }, { "epoch": 0.06130757563662489, "grad_norm": 0.16473506513467906, "learning_rate": 1.994868479593852e-05, "loss": 0.3097, "step": 768 }, { "epoch": 0.061387403209068415, "grad_norm": 0.15740922720625616, "learning_rate": 1.9948422875306684e-05, "loss": 0.3466, "step": 769 }, { "epoch": 0.061467230781511936, "grad_norm": 0.1704989209507051, "learning_rate": 1.9948160289662155e-05, "loss": 0.3301, "step": 770 }, { "epoch": 0.06154705835395546, "grad_norm": 0.16229294883327486, "learning_rate": 1.9947897039022473e-05, "loss": 0.3192, "step": 771 }, { "epoch": 0.06162688592639898, "grad_norm": 0.18277416158380055, "learning_rate": 1.9947633123405242e-05, "loss": 0.3432, "step": 772 }, { "epoch": 0.0617067134988425, "grad_norm": 0.177612448144149, "learning_rate": 1.9947368542828105e-05, "loss": 0.2793, "step": 773 }, { "epoch": 0.06178654107128602, "grad_norm": 0.18065013250334763, "learning_rate": 1.9947103297308746e-05, "loss": 0.332, "step": 774 }, { "epoch": 0.06186636864372955, "grad_norm": 0.1585312350679272, "learning_rate": 1.9946837386864895e-05, "loss": 0.2836, "step": 775 }, { "epoch": 0.06194619621617307, "grad_norm": 0.17409027303161828, "learning_rate": 1.9946570811514333e-05, "loss": 0.3502, "step": 776 }, { "epoch": 0.06202602378861659, "grad_norm": 0.18074938859249814, "learning_rate": 1.9946303571274873e-05, "loss": 0.3203, "step": 777 }, { "epoch": 0.06210585136106011, "grad_norm": 0.16987375125146828, "learning_rate": 1.994603566616438e-05, "loss": 0.3031, "step": 778 }, { "epoch": 0.06218567893350363, "grad_norm": 0.181587329485089, "learning_rate": 1.9945767096200765e-05, "loss": 0.2845, "step": 779 }, { "epoch": 0.06226550650594716, "grad_norm": 0.1653334647102969, "learning_rate": 1.994549786140198e-05, "loss": 0.3162, "step": 780 }, { "epoch": 0.06234533407839068, "grad_norm": 0.17061568598723778, "learning_rate": 1.9945227961786017e-05, "loss": 0.3359, "step": 781 }, { "epoch": 0.0624251616508342, "grad_norm": 0.1618321804606529, "learning_rate": 1.9944957397370927e-05, "loss": 0.3706, "step": 782 }, { "epoch": 0.06250498922327773, "grad_norm": 0.2098165184833741, "learning_rate": 1.994468616817479e-05, "loss": 0.3146, "step": 783 }, { "epoch": 0.06258481679572124, "grad_norm": 0.18695787285696058, "learning_rate": 1.9944414274215736e-05, "loss": 0.3129, "step": 784 }, { "epoch": 0.06266464436816477, "grad_norm": 0.17745747731850087, "learning_rate": 1.9944141715511946e-05, "loss": 0.3028, "step": 785 }, { "epoch": 0.06274447194060828, "grad_norm": 0.17485645186445165, "learning_rate": 1.9943868492081632e-05, "loss": 0.3074, "step": 786 }, { "epoch": 0.06282429951305181, "grad_norm": 0.1726974379157718, "learning_rate": 1.994359460394306e-05, "loss": 0.2874, "step": 787 }, { "epoch": 0.06290412708549532, "grad_norm": 0.1816437383000171, "learning_rate": 1.9943320051114546e-05, "loss": 0.3196, "step": 788 }, { "epoch": 0.06298395465793885, "grad_norm": 0.16739766737118006, "learning_rate": 1.994304483361443e-05, "loss": 0.2874, "step": 789 }, { "epoch": 0.06306378223038238, "grad_norm": 0.16218938681131861, "learning_rate": 1.994276895146112e-05, "loss": 0.2908, "step": 790 }, { "epoch": 0.06314360980282589, "grad_norm": 0.19804660821666792, "learning_rate": 1.994249240467305e-05, "loss": 0.3062, "step": 791 }, { "epoch": 0.06322343737526942, "grad_norm": 0.19117764964225425, "learning_rate": 1.994221519326871e-05, "loss": 0.2954, "step": 792 }, { "epoch": 0.06330326494771293, "grad_norm": 0.18226794894833537, "learning_rate": 1.994193731726663e-05, "loss": 0.364, "step": 793 }, { "epoch": 0.06338309252015646, "grad_norm": 0.2015966271240988, "learning_rate": 1.9941658776685385e-05, "loss": 0.3172, "step": 794 }, { "epoch": 0.06346292009259999, "grad_norm": 0.18363152530744306, "learning_rate": 1.9941379571543597e-05, "loss": 0.2639, "step": 795 }, { "epoch": 0.0635427476650435, "grad_norm": 0.1762956430696276, "learning_rate": 1.9941099701859925e-05, "loss": 0.31, "step": 796 }, { "epoch": 0.06362257523748703, "grad_norm": 0.17023946067766907, "learning_rate": 1.9940819167653078e-05, "loss": 0.3294, "step": 797 }, { "epoch": 0.06370240280993054, "grad_norm": 0.18310800697942553, "learning_rate": 1.994053796894181e-05, "loss": 0.3126, "step": 798 }, { "epoch": 0.06378223038237407, "grad_norm": 0.19275357325365303, "learning_rate": 1.994025610574492e-05, "loss": 0.3192, "step": 799 }, { "epoch": 0.0638620579548176, "grad_norm": 0.1858455579304735, "learning_rate": 1.9939973578081245e-05, "loss": 0.2833, "step": 800 }, { "epoch": 0.06394188552726111, "grad_norm": 0.19713439382209522, "learning_rate": 1.9939690385969676e-05, "loss": 0.295, "step": 801 }, { "epoch": 0.06402171309970464, "grad_norm": 0.1573810227028926, "learning_rate": 1.9939406529429135e-05, "loss": 0.2953, "step": 802 }, { "epoch": 0.06410154067214816, "grad_norm": 0.1593400875646669, "learning_rate": 1.9939122008478604e-05, "loss": 0.2849, "step": 803 }, { "epoch": 0.06418136824459168, "grad_norm": 0.19843845337106142, "learning_rate": 1.99388368231371e-05, "loss": 0.3447, "step": 804 }, { "epoch": 0.06426119581703521, "grad_norm": 0.18597181956388062, "learning_rate": 1.9938550973423686e-05, "loss": 0.3145, "step": 805 }, { "epoch": 0.06434102338947872, "grad_norm": 0.18408854885335413, "learning_rate": 1.9938264459357473e-05, "loss": 0.2741, "step": 806 }, { "epoch": 0.06442085096192225, "grad_norm": 0.17102718080582036, "learning_rate": 1.993797728095761e-05, "loss": 0.2899, "step": 807 }, { "epoch": 0.06450067853436577, "grad_norm": 0.17622508955219854, "learning_rate": 1.9937689438243294e-05, "loss": 0.2887, "step": 808 }, { "epoch": 0.0645805061068093, "grad_norm": 0.18713764307069844, "learning_rate": 1.9937400931233765e-05, "loss": 0.3082, "step": 809 }, { "epoch": 0.06466033367925281, "grad_norm": 0.16998061208394843, "learning_rate": 1.993711175994831e-05, "loss": 0.3215, "step": 810 }, { "epoch": 0.06474016125169633, "grad_norm": 0.16342717323888456, "learning_rate": 1.9936821924406262e-05, "loss": 0.3009, "step": 811 }, { "epoch": 0.06481998882413986, "grad_norm": 0.20616618227649594, "learning_rate": 1.9936531424626994e-05, "loss": 0.3023, "step": 812 }, { "epoch": 0.06489981639658338, "grad_norm": 0.21380306605162247, "learning_rate": 1.993624026062992e-05, "loss": 0.29, "step": 813 }, { "epoch": 0.0649796439690269, "grad_norm": 0.17252577404120414, "learning_rate": 1.9935948432434505e-05, "loss": 0.3518, "step": 814 }, { "epoch": 0.06505947154147042, "grad_norm": 0.17479595185970304, "learning_rate": 1.993565594006026e-05, "loss": 0.2858, "step": 815 }, { "epoch": 0.06513929911391395, "grad_norm": 0.20371541911229957, "learning_rate": 1.993536278352674e-05, "loss": 0.2713, "step": 816 }, { "epoch": 0.06521912668635747, "grad_norm": 0.1897465217916296, "learning_rate": 1.9935068962853526e-05, "loss": 0.3096, "step": 817 }, { "epoch": 0.06529895425880099, "grad_norm": 0.16848595404307448, "learning_rate": 1.9934774478060276e-05, "loss": 0.2876, "step": 818 }, { "epoch": 0.06537878183124451, "grad_norm": 0.18882883530099415, "learning_rate": 1.9934479329166666e-05, "loss": 0.3088, "step": 819 }, { "epoch": 0.06545860940368803, "grad_norm": 0.18286528378908987, "learning_rate": 1.993418351619243e-05, "loss": 0.2829, "step": 820 }, { "epoch": 0.06553843697613156, "grad_norm": 0.1818308619544585, "learning_rate": 1.9933887039157337e-05, "loss": 0.3063, "step": 821 }, { "epoch": 0.06561826454857508, "grad_norm": 0.17259296385606024, "learning_rate": 1.9933589898081212e-05, "loss": 0.2845, "step": 822 }, { "epoch": 0.0656980921210186, "grad_norm": 0.2107559223829382, "learning_rate": 1.9933292092983908e-05, "loss": 0.3283, "step": 823 }, { "epoch": 0.06577791969346213, "grad_norm": 0.1777545534677515, "learning_rate": 1.9932993623885344e-05, "loss": 0.3075, "step": 824 }, { "epoch": 0.06585774726590564, "grad_norm": 0.15843764127961876, "learning_rate": 1.993269449080546e-05, "loss": 0.2687, "step": 825 }, { "epoch": 0.06593757483834917, "grad_norm": 0.170086685819695, "learning_rate": 1.993239469376426e-05, "loss": 0.3313, "step": 826 }, { "epoch": 0.0660174024107927, "grad_norm": 0.20802745906673767, "learning_rate": 1.9932094232781783e-05, "loss": 0.275, "step": 827 }, { "epoch": 0.06609722998323621, "grad_norm": 0.19306360220519256, "learning_rate": 1.9931793107878108e-05, "loss": 0.2814, "step": 828 }, { "epoch": 0.06617705755567974, "grad_norm": 0.1726417059573894, "learning_rate": 1.993149131907337e-05, "loss": 0.3427, "step": 829 }, { "epoch": 0.06625688512812325, "grad_norm": 0.19811346724914175, "learning_rate": 1.9931188866387742e-05, "loss": 0.3171, "step": 830 }, { "epoch": 0.06633671270056678, "grad_norm": 0.17383757980530176, "learning_rate": 1.9930885749841437e-05, "loss": 0.296, "step": 831 }, { "epoch": 0.06641654027301029, "grad_norm": 0.17812262615534638, "learning_rate": 1.993058196945472e-05, "loss": 0.321, "step": 832 }, { "epoch": 0.06649636784545382, "grad_norm": 0.18490283891256912, "learning_rate": 1.9930277525247904e-05, "loss": 0.2906, "step": 833 }, { "epoch": 0.06657619541789735, "grad_norm": 0.17297739631168862, "learning_rate": 1.992997241724133e-05, "loss": 0.2825, "step": 834 }, { "epoch": 0.06665602299034086, "grad_norm": 0.18641990326481037, "learning_rate": 1.9929666645455393e-05, "loss": 0.3012, "step": 835 }, { "epoch": 0.06673585056278439, "grad_norm": 0.20666364724394298, "learning_rate": 1.992936020991054e-05, "loss": 0.3036, "step": 836 }, { "epoch": 0.0668156781352279, "grad_norm": 0.16761199105317454, "learning_rate": 1.9929053110627253e-05, "loss": 0.3252, "step": 837 }, { "epoch": 0.06689550570767143, "grad_norm": 0.188573559874284, "learning_rate": 1.9928745347626056e-05, "loss": 0.3115, "step": 838 }, { "epoch": 0.06697533328011496, "grad_norm": 0.19931101113271837, "learning_rate": 1.9928436920927528e-05, "loss": 0.3174, "step": 839 }, { "epoch": 0.06705516085255847, "grad_norm": 0.16289863981236397, "learning_rate": 1.9928127830552282e-05, "loss": 0.2869, "step": 840 }, { "epoch": 0.067134988425002, "grad_norm": 0.21104739521220744, "learning_rate": 1.992781807652098e-05, "loss": 0.2758, "step": 841 }, { "epoch": 0.06721481599744551, "grad_norm": 0.18951088644800498, "learning_rate": 1.9927507658854327e-05, "loss": 0.2602, "step": 842 }, { "epoch": 0.06729464356988904, "grad_norm": 0.17037460910226918, "learning_rate": 1.9927196577573075e-05, "loss": 0.3348, "step": 843 }, { "epoch": 0.06737447114233257, "grad_norm": 0.19203426076846178, "learning_rate": 1.9926884832698016e-05, "loss": 0.3086, "step": 844 }, { "epoch": 0.06745429871477608, "grad_norm": 0.19633290527005964, "learning_rate": 1.9926572424249994e-05, "loss": 0.3223, "step": 845 }, { "epoch": 0.06753412628721961, "grad_norm": 0.16498188605681602, "learning_rate": 1.9926259352249887e-05, "loss": 0.3125, "step": 846 }, { "epoch": 0.06761395385966312, "grad_norm": 0.16474702709495928, "learning_rate": 1.9925945616718626e-05, "loss": 0.3353, "step": 847 }, { "epoch": 0.06769378143210665, "grad_norm": 0.16951563966762545, "learning_rate": 1.992563121767718e-05, "loss": 0.3072, "step": 848 }, { "epoch": 0.06777360900455018, "grad_norm": 0.17379128244762834, "learning_rate": 1.992531615514657e-05, "loss": 0.2887, "step": 849 }, { "epoch": 0.06785343657699369, "grad_norm": 0.19846312432823796, "learning_rate": 1.992500042914785e-05, "loss": 0.2979, "step": 850 }, { "epoch": 0.06793326414943722, "grad_norm": 0.1891273184297098, "learning_rate": 1.992468403970213e-05, "loss": 0.306, "step": 851 }, { "epoch": 0.06801309172188073, "grad_norm": 0.23454060510287292, "learning_rate": 1.992436698683056e-05, "loss": 0.2925, "step": 852 }, { "epoch": 0.06809291929432426, "grad_norm": 0.2177807014472457, "learning_rate": 1.992404927055433e-05, "loss": 0.2774, "step": 853 }, { "epoch": 0.06817274686676777, "grad_norm": 0.1819931518834644, "learning_rate": 1.992373089089468e-05, "loss": 0.29, "step": 854 }, { "epoch": 0.0682525744392113, "grad_norm": 0.2210299001156096, "learning_rate": 1.9923411847872895e-05, "loss": 0.2644, "step": 855 }, { "epoch": 0.06833240201165483, "grad_norm": 0.18074571363264774, "learning_rate": 1.9923092141510295e-05, "loss": 0.3321, "step": 856 }, { "epoch": 0.06841222958409834, "grad_norm": 0.17629166602429394, "learning_rate": 1.992277177182826e-05, "loss": 0.3058, "step": 857 }, { "epoch": 0.06849205715654187, "grad_norm": 0.16536956744830816, "learning_rate": 1.99224507388482e-05, "loss": 0.3433, "step": 858 }, { "epoch": 0.06857188472898539, "grad_norm": 0.17537515700159187, "learning_rate": 1.992212904259157e-05, "loss": 0.3402, "step": 859 }, { "epoch": 0.06865171230142891, "grad_norm": 0.19631528612416108, "learning_rate": 1.9921806683079884e-05, "loss": 0.325, "step": 860 }, { "epoch": 0.06873153987387244, "grad_norm": 0.25443393365090156, "learning_rate": 1.9921483660334684e-05, "loss": 0.2438, "step": 861 }, { "epoch": 0.06881136744631595, "grad_norm": 0.18290664699386025, "learning_rate": 1.9921159974377567e-05, "loss": 0.3051, "step": 862 }, { "epoch": 0.06889119501875948, "grad_norm": 0.2385903074640058, "learning_rate": 1.9920835625230167e-05, "loss": 0.2663, "step": 863 }, { "epoch": 0.068971022591203, "grad_norm": 0.22221843352262347, "learning_rate": 1.9920510612914166e-05, "loss": 0.325, "step": 864 }, { "epoch": 0.06905085016364652, "grad_norm": 0.1905446456991734, "learning_rate": 1.992018493745129e-05, "loss": 0.3203, "step": 865 }, { "epoch": 0.06913067773609005, "grad_norm": 0.1773131740389162, "learning_rate": 1.991985859886331e-05, "loss": 0.3416, "step": 866 }, { "epoch": 0.06921050530853357, "grad_norm": 0.17545920259234815, "learning_rate": 1.9919531597172037e-05, "loss": 0.2974, "step": 867 }, { "epoch": 0.06929033288097709, "grad_norm": 0.20605287121630403, "learning_rate": 1.9919203932399335e-05, "loss": 0.3066, "step": 868 }, { "epoch": 0.0693701604534206, "grad_norm": 0.19912474376258768, "learning_rate": 1.9918875604567105e-05, "loss": 0.2602, "step": 869 }, { "epoch": 0.06944998802586413, "grad_norm": 0.18658709162987686, "learning_rate": 1.9918546613697293e-05, "loss": 0.2655, "step": 870 }, { "epoch": 0.06952981559830766, "grad_norm": 0.18742240720709966, "learning_rate": 1.991821695981189e-05, "loss": 0.2809, "step": 871 }, { "epoch": 0.06960964317075118, "grad_norm": 0.2036610672001906, "learning_rate": 1.991788664293294e-05, "loss": 0.3491, "step": 872 }, { "epoch": 0.0696894707431947, "grad_norm": 0.18204742119194176, "learning_rate": 1.991755566308251e-05, "loss": 0.2509, "step": 873 }, { "epoch": 0.06976929831563822, "grad_norm": 0.1826771131212604, "learning_rate": 1.9917224020282738e-05, "loss": 0.2694, "step": 874 }, { "epoch": 0.06984912588808175, "grad_norm": 0.25543173040936523, "learning_rate": 1.9916891714555782e-05, "loss": 0.3049, "step": 875 }, { "epoch": 0.06992895346052526, "grad_norm": 0.17930620320157817, "learning_rate": 1.9916558745923865e-05, "loss": 0.3078, "step": 876 }, { "epoch": 0.07000878103296879, "grad_norm": 0.18549310062860538, "learning_rate": 1.9916225114409234e-05, "loss": 0.3573, "step": 877 }, { "epoch": 0.07008860860541231, "grad_norm": 0.20477138400615305, "learning_rate": 1.9915890820034202e-05, "loss": 0.276, "step": 878 }, { "epoch": 0.07016843617785583, "grad_norm": 0.18245711735024545, "learning_rate": 1.9915555862821106e-05, "loss": 0.3163, "step": 879 }, { "epoch": 0.07024826375029936, "grad_norm": 0.16088405671034747, "learning_rate": 1.991522024279234e-05, "loss": 0.2896, "step": 880 }, { "epoch": 0.07032809132274287, "grad_norm": 0.1918606570649937, "learning_rate": 1.9914883959970342e-05, "loss": 0.32, "step": 881 }, { "epoch": 0.0704079188951864, "grad_norm": 0.17881208689515768, "learning_rate": 1.991454701437759e-05, "loss": 0.293, "step": 882 }, { "epoch": 0.07048774646762992, "grad_norm": 0.202366039758863, "learning_rate": 1.9914209406036605e-05, "loss": 0.3147, "step": 883 }, { "epoch": 0.07056757404007344, "grad_norm": 0.24507834831642047, "learning_rate": 1.9913871134969957e-05, "loss": 0.3023, "step": 884 }, { "epoch": 0.07064740161251697, "grad_norm": 0.2078482459926984, "learning_rate": 1.9913532201200255e-05, "loss": 0.2658, "step": 885 }, { "epoch": 0.07072722918496048, "grad_norm": 0.21412137868647427, "learning_rate": 1.9913192604750156e-05, "loss": 0.2711, "step": 886 }, { "epoch": 0.07080705675740401, "grad_norm": 0.19240352870965147, "learning_rate": 1.9912852345642368e-05, "loss": 0.2966, "step": 887 }, { "epoch": 0.07088688432984754, "grad_norm": 0.17267205966493912, "learning_rate": 1.9912511423899623e-05, "loss": 0.316, "step": 888 }, { "epoch": 0.07096671190229105, "grad_norm": 0.1924804817742059, "learning_rate": 1.991216983954472e-05, "loss": 0.2654, "step": 889 }, { "epoch": 0.07104653947473458, "grad_norm": 0.19978687786393637, "learning_rate": 1.991182759260049e-05, "loss": 0.2965, "step": 890 }, { "epoch": 0.07112636704717809, "grad_norm": 0.16237888713145773, "learning_rate": 1.9911484683089812e-05, "loss": 0.2581, "step": 891 }, { "epoch": 0.07120619461962162, "grad_norm": 0.17348409239746823, "learning_rate": 1.9911141111035606e-05, "loss": 0.297, "step": 892 }, { "epoch": 0.07128602219206515, "grad_norm": 0.17646136221037304, "learning_rate": 1.991079687646084e-05, "loss": 0.301, "step": 893 }, { "epoch": 0.07136584976450866, "grad_norm": 0.21816251272735157, "learning_rate": 1.9910451979388524e-05, "loss": 0.2942, "step": 894 }, { "epoch": 0.07144567733695219, "grad_norm": 0.218194090997414, "learning_rate": 1.991010641984171e-05, "loss": 0.3088, "step": 895 }, { "epoch": 0.0715255049093957, "grad_norm": 0.17795973230294712, "learning_rate": 1.9909760197843502e-05, "loss": 0.3089, "step": 896 }, { "epoch": 0.07160533248183923, "grad_norm": 0.2104648326315149, "learning_rate": 1.9909413313417042e-05, "loss": 0.3044, "step": 897 }, { "epoch": 0.07168516005428274, "grad_norm": 0.2458086538629545, "learning_rate": 1.9909065766585522e-05, "loss": 0.3036, "step": 898 }, { "epoch": 0.07176498762672627, "grad_norm": 0.16231762590237853, "learning_rate": 1.9908717557372163e-05, "loss": 0.2967, "step": 899 }, { "epoch": 0.0718448151991698, "grad_norm": 0.18173733099516667, "learning_rate": 1.9908368685800253e-05, "loss": 0.2766, "step": 900 }, { "epoch": 0.07192464277161331, "grad_norm": 0.19516416768764366, "learning_rate": 1.9908019151893104e-05, "loss": 0.3506, "step": 901 }, { "epoch": 0.07200447034405684, "grad_norm": 0.230902282792098, "learning_rate": 1.9907668955674086e-05, "loss": 0.296, "step": 902 }, { "epoch": 0.07208429791650035, "grad_norm": 0.25031732997297595, "learning_rate": 1.990731809716661e-05, "loss": 0.2967, "step": 903 }, { "epoch": 0.07216412548894388, "grad_norm": 0.18027686643782323, "learning_rate": 1.9906966576394124e-05, "loss": 0.304, "step": 904 }, { "epoch": 0.07224395306138741, "grad_norm": 0.20256296459439835, "learning_rate": 1.990661439338013e-05, "loss": 0.321, "step": 905 }, { "epoch": 0.07232378063383092, "grad_norm": 0.2080709462761502, "learning_rate": 1.990626154814817e-05, "loss": 0.2842, "step": 906 }, { "epoch": 0.07240360820627445, "grad_norm": 0.1853905081953601, "learning_rate": 1.9905908040721824e-05, "loss": 0.2816, "step": 907 }, { "epoch": 0.07248343577871796, "grad_norm": 0.20066527847520862, "learning_rate": 1.990555387112473e-05, "loss": 0.2894, "step": 908 }, { "epoch": 0.07256326335116149, "grad_norm": 0.22282438061439744, "learning_rate": 1.9905199039380562e-05, "loss": 0.2924, "step": 909 }, { "epoch": 0.07264309092360502, "grad_norm": 0.17512267644372936, "learning_rate": 1.9904843545513034e-05, "loss": 0.3304, "step": 910 }, { "epoch": 0.07272291849604853, "grad_norm": 0.19625578843584204, "learning_rate": 1.9904487389545912e-05, "loss": 0.2589, "step": 911 }, { "epoch": 0.07280274606849206, "grad_norm": 0.20505208123255045, "learning_rate": 1.990413057150301e-05, "loss": 0.2426, "step": 912 }, { "epoch": 0.07288257364093557, "grad_norm": 0.1783743419432154, "learning_rate": 1.9903773091408167e-05, "loss": 0.2952, "step": 913 }, { "epoch": 0.0729624012133791, "grad_norm": 0.18152492249737334, "learning_rate": 1.990341494928529e-05, "loss": 0.2405, "step": 914 }, { "epoch": 0.07304222878582263, "grad_norm": 0.213755388558285, "learning_rate": 1.9903056145158315e-05, "loss": 0.2984, "step": 915 }, { "epoch": 0.07312205635826614, "grad_norm": 0.22191421297555974, "learning_rate": 1.9902696679051227e-05, "loss": 0.2356, "step": 916 }, { "epoch": 0.07320188393070967, "grad_norm": 0.1852167850487188, "learning_rate": 1.9902336550988054e-05, "loss": 0.2637, "step": 917 }, { "epoch": 0.07328171150315319, "grad_norm": 0.21011266247906116, "learning_rate": 1.9901975760992874e-05, "loss": 0.2902, "step": 918 }, { "epoch": 0.07336153907559671, "grad_norm": 0.2046310326077274, "learning_rate": 1.9901614309089798e-05, "loss": 0.2613, "step": 919 }, { "epoch": 0.07344136664804023, "grad_norm": 0.19946844638646083, "learning_rate": 1.9901252195302993e-05, "loss": 0.3271, "step": 920 }, { "epoch": 0.07352119422048375, "grad_norm": 0.20476694075987487, "learning_rate": 1.9900889419656657e-05, "loss": 0.2358, "step": 921 }, { "epoch": 0.07360102179292728, "grad_norm": 0.19651703383455868, "learning_rate": 1.990052598217505e-05, "loss": 0.3681, "step": 922 }, { "epoch": 0.0736808493653708, "grad_norm": 0.170336374243936, "learning_rate": 1.990016188288246e-05, "loss": 0.3113, "step": 923 }, { "epoch": 0.07376067693781432, "grad_norm": 0.26714466836800305, "learning_rate": 1.9899797121803227e-05, "loss": 0.3441, "step": 924 }, { "epoch": 0.07384050451025784, "grad_norm": 0.23027330556345754, "learning_rate": 1.9899431698961735e-05, "loss": 0.3041, "step": 925 }, { "epoch": 0.07392033208270136, "grad_norm": 0.21658516980372186, "learning_rate": 1.989906561438241e-05, "loss": 0.3248, "step": 926 }, { "epoch": 0.07400015965514489, "grad_norm": 0.19208055882704642, "learning_rate": 1.9898698868089723e-05, "loss": 0.3254, "step": 927 }, { "epoch": 0.0740799872275884, "grad_norm": 0.17718974972002335, "learning_rate": 1.989833146010819e-05, "loss": 0.2739, "step": 928 }, { "epoch": 0.07415981480003193, "grad_norm": 0.17783275843901042, "learning_rate": 1.989796339046237e-05, "loss": 0.2833, "step": 929 }, { "epoch": 0.07423964237247545, "grad_norm": 0.1652224275517474, "learning_rate": 1.989759465917687e-05, "loss": 0.2982, "step": 930 }, { "epoch": 0.07431946994491898, "grad_norm": 0.20220086949600305, "learning_rate": 1.989722526627633e-05, "loss": 0.3465, "step": 931 }, { "epoch": 0.0743992975173625, "grad_norm": 0.1727401325564754, "learning_rate": 1.9896855211785456e-05, "loss": 0.2963, "step": 932 }, { "epoch": 0.07447912508980602, "grad_norm": 0.20996782968816682, "learning_rate": 1.9896484495728975e-05, "loss": 0.3019, "step": 933 }, { "epoch": 0.07455895266224954, "grad_norm": 0.18497049876217475, "learning_rate": 1.9896113118131667e-05, "loss": 0.3153, "step": 934 }, { "epoch": 0.07463878023469306, "grad_norm": 0.19025775137939044, "learning_rate": 1.9895741079018364e-05, "loss": 0.2656, "step": 935 }, { "epoch": 0.07471860780713659, "grad_norm": 0.21375482943571086, "learning_rate": 1.989536837841393e-05, "loss": 0.2826, "step": 936 }, { "epoch": 0.07479843537958011, "grad_norm": 0.1843477474968301, "learning_rate": 1.989499501634328e-05, "loss": 0.2944, "step": 937 }, { "epoch": 0.07487826295202363, "grad_norm": 0.178423391864768, "learning_rate": 1.9894620992831372e-05, "loss": 0.2689, "step": 938 }, { "epoch": 0.07495809052446716, "grad_norm": 0.18964953716816385, "learning_rate": 1.9894246307903208e-05, "loss": 0.3158, "step": 939 }, { "epoch": 0.07503791809691067, "grad_norm": 0.1836277627846682, "learning_rate": 1.989387096158383e-05, "loss": 0.2788, "step": 940 }, { "epoch": 0.0751177456693542, "grad_norm": 0.2158918458205282, "learning_rate": 1.9893494953898336e-05, "loss": 0.2777, "step": 941 }, { "epoch": 0.07519757324179771, "grad_norm": 0.18998891989811018, "learning_rate": 1.989311828487186e-05, "loss": 0.3325, "step": 942 }, { "epoch": 0.07527740081424124, "grad_norm": 0.1786673971801648, "learning_rate": 1.9892740954529573e-05, "loss": 0.3468, "step": 943 }, { "epoch": 0.07535722838668477, "grad_norm": 0.18331951303919705, "learning_rate": 1.9892362962896706e-05, "loss": 0.265, "step": 944 }, { "epoch": 0.07543705595912828, "grad_norm": 0.1881762804953479, "learning_rate": 1.989198430999852e-05, "loss": 0.3635, "step": 945 }, { "epoch": 0.07551688353157181, "grad_norm": 0.17312323257197257, "learning_rate": 1.989160499586033e-05, "loss": 0.3198, "step": 946 }, { "epoch": 0.07559671110401532, "grad_norm": 0.19265414825650773, "learning_rate": 1.9891225020507496e-05, "loss": 0.2919, "step": 947 }, { "epoch": 0.07567653867645885, "grad_norm": 0.25137162940933705, "learning_rate": 1.989084438396541e-05, "loss": 0.2717, "step": 948 }, { "epoch": 0.07575636624890238, "grad_norm": 0.19951749407413252, "learning_rate": 1.989046308625952e-05, "loss": 0.3079, "step": 949 }, { "epoch": 0.07583619382134589, "grad_norm": 0.18208603292814288, "learning_rate": 1.989008112741531e-05, "loss": 0.3077, "step": 950 }, { "epoch": 0.07591602139378942, "grad_norm": 0.17941885734663132, "learning_rate": 1.988969850745832e-05, "loss": 0.2749, "step": 951 }, { "epoch": 0.07599584896623293, "grad_norm": 0.22120511934594486, "learning_rate": 1.988931522641412e-05, "loss": 0.2944, "step": 952 }, { "epoch": 0.07607567653867646, "grad_norm": 0.23029020609034787, "learning_rate": 1.9888931284308333e-05, "loss": 0.284, "step": 953 }, { "epoch": 0.07615550411111999, "grad_norm": 0.1752386739211597, "learning_rate": 1.988854668116663e-05, "loss": 0.2985, "step": 954 }, { "epoch": 0.0762353316835635, "grad_norm": 0.19425785822856728, "learning_rate": 1.988816141701471e-05, "loss": 0.294, "step": 955 }, { "epoch": 0.07631515925600703, "grad_norm": 0.241445331882445, "learning_rate": 1.988777549187833e-05, "loss": 0.3207, "step": 956 }, { "epoch": 0.07639498682845054, "grad_norm": 0.21741498135970586, "learning_rate": 1.988738890578329e-05, "loss": 0.2748, "step": 957 }, { "epoch": 0.07647481440089407, "grad_norm": 0.19578032132691967, "learning_rate": 1.988700165875543e-05, "loss": 0.2779, "step": 958 }, { "epoch": 0.0765546419733376, "grad_norm": 0.2575594020634077, "learning_rate": 1.9886613750820632e-05, "loss": 0.356, "step": 959 }, { "epoch": 0.07663446954578111, "grad_norm": 0.2171797603988687, "learning_rate": 1.9886225182004837e-05, "loss": 0.318, "step": 960 }, { "epoch": 0.07671429711822464, "grad_norm": 0.2598063286928739, "learning_rate": 1.988583595233401e-05, "loss": 0.2712, "step": 961 }, { "epoch": 0.07679412469066815, "grad_norm": 0.19857104504151474, "learning_rate": 1.988544606183417e-05, "loss": 0.3167, "step": 962 }, { "epoch": 0.07687395226311168, "grad_norm": 0.2369052813842022, "learning_rate": 1.9885055510531383e-05, "loss": 0.3165, "step": 963 }, { "epoch": 0.0769537798355552, "grad_norm": 0.20074946573988298, "learning_rate": 1.988466429845176e-05, "loss": 0.3362, "step": 964 }, { "epoch": 0.07703360740799872, "grad_norm": 0.21175351324855418, "learning_rate": 1.9884272425621437e-05, "loss": 0.333, "step": 965 }, { "epoch": 0.07711343498044225, "grad_norm": 0.259067547361112, "learning_rate": 1.9883879892066623e-05, "loss": 0.2892, "step": 966 }, { "epoch": 0.07719326255288576, "grad_norm": 0.1899384873511659, "learning_rate": 1.9883486697813554e-05, "loss": 0.2885, "step": 967 }, { "epoch": 0.07727309012532929, "grad_norm": 0.20233296915536975, "learning_rate": 1.988309284288851e-05, "loss": 0.3203, "step": 968 }, { "epoch": 0.0773529176977728, "grad_norm": 0.226625426365794, "learning_rate": 1.9882698327317823e-05, "loss": 0.2807, "step": 969 }, { "epoch": 0.07743274527021633, "grad_norm": 0.21541232932206275, "learning_rate": 1.988230315112786e-05, "loss": 0.2794, "step": 970 }, { "epoch": 0.07751257284265986, "grad_norm": 0.20953239993206033, "learning_rate": 1.9881907314345042e-05, "loss": 0.2945, "step": 971 }, { "epoch": 0.07759240041510337, "grad_norm": 0.2131750976798585, "learning_rate": 1.9881510816995827e-05, "loss": 0.2635, "step": 972 }, { "epoch": 0.0776722279875469, "grad_norm": 0.21889460577532638, "learning_rate": 1.9881113659106722e-05, "loss": 0.283, "step": 973 }, { "epoch": 0.07775205555999042, "grad_norm": 0.19606639525478334, "learning_rate": 1.9880715840704268e-05, "loss": 0.2814, "step": 974 }, { "epoch": 0.07783188313243394, "grad_norm": 0.18674383043998835, "learning_rate": 1.9880317361815063e-05, "loss": 0.293, "step": 975 }, { "epoch": 0.07791171070487747, "grad_norm": 0.19522238495009447, "learning_rate": 1.9879918222465745e-05, "loss": 0.3273, "step": 976 }, { "epoch": 0.07799153827732098, "grad_norm": 0.23807380097533865, "learning_rate": 1.987951842268299e-05, "loss": 0.2949, "step": 977 }, { "epoch": 0.07807136584976451, "grad_norm": 0.18830687723448727, "learning_rate": 1.9879117962493527e-05, "loss": 0.2595, "step": 978 }, { "epoch": 0.07815119342220803, "grad_norm": 0.19294282977364677, "learning_rate": 1.9878716841924126e-05, "loss": 0.2715, "step": 979 }, { "epoch": 0.07823102099465155, "grad_norm": 0.26612807120162574, "learning_rate": 1.9878315061001595e-05, "loss": 0.2603, "step": 980 }, { "epoch": 0.07831084856709508, "grad_norm": 0.20405089865151144, "learning_rate": 1.98779126197528e-05, "loss": 0.3086, "step": 981 }, { "epoch": 0.0783906761395386, "grad_norm": 0.19613756633084742, "learning_rate": 1.9877509518204632e-05, "loss": 0.2672, "step": 982 }, { "epoch": 0.07847050371198212, "grad_norm": 0.22845482628225763, "learning_rate": 1.9877105756384044e-05, "loss": 0.2701, "step": 983 }, { "epoch": 0.07855033128442564, "grad_norm": 0.21575829412854677, "learning_rate": 1.9876701334318024e-05, "loss": 0.2987, "step": 984 }, { "epoch": 0.07863015885686916, "grad_norm": 0.18145693488439618, "learning_rate": 1.987629625203361e-05, "loss": 0.3135, "step": 985 }, { "epoch": 0.07870998642931268, "grad_norm": 0.2572294588764231, "learning_rate": 1.987589050955787e-05, "loss": 0.2894, "step": 986 }, { "epoch": 0.0787898140017562, "grad_norm": 0.2801159785276153, "learning_rate": 1.9875484106917936e-05, "loss": 0.2984, "step": 987 }, { "epoch": 0.07886964157419973, "grad_norm": 0.22994576602346145, "learning_rate": 1.987507704414097e-05, "loss": 0.2841, "step": 988 }, { "epoch": 0.07894946914664325, "grad_norm": 0.223201128983201, "learning_rate": 1.9874669321254182e-05, "loss": 0.3276, "step": 989 }, { "epoch": 0.07902929671908678, "grad_norm": 0.23104031990148494, "learning_rate": 1.987426093828483e-05, "loss": 0.3163, "step": 990 }, { "epoch": 0.07910912429153029, "grad_norm": 0.22606039620007226, "learning_rate": 1.987385189526021e-05, "loss": 0.2916, "step": 991 }, { "epoch": 0.07918895186397382, "grad_norm": 0.1746842132591328, "learning_rate": 1.987344219220767e-05, "loss": 0.3024, "step": 992 }, { "epoch": 0.07926877943641734, "grad_norm": 0.2173172256701975, "learning_rate": 1.9873031829154588e-05, "loss": 0.2923, "step": 993 }, { "epoch": 0.07934860700886086, "grad_norm": 0.22195771236273165, "learning_rate": 1.98726208061284e-05, "loss": 0.2783, "step": 994 }, { "epoch": 0.07942843458130439, "grad_norm": 0.2204334806635215, "learning_rate": 1.9872209123156582e-05, "loss": 0.2691, "step": 995 }, { "epoch": 0.0795082621537479, "grad_norm": 0.1834759504082246, "learning_rate": 1.9871796780266653e-05, "loss": 0.2831, "step": 996 }, { "epoch": 0.07958808972619143, "grad_norm": 0.19540186282466623, "learning_rate": 1.9871383777486177e-05, "loss": 0.274, "step": 997 }, { "epoch": 0.07966791729863495, "grad_norm": 0.19560880063266187, "learning_rate": 1.9870970114842758e-05, "loss": 0.316, "step": 998 }, { "epoch": 0.07974774487107847, "grad_norm": 0.22131597521645435, "learning_rate": 1.9870555792364054e-05, "loss": 0.2841, "step": 999 }, { "epoch": 0.079827572443522, "grad_norm": 0.19319742214579422, "learning_rate": 1.9870140810077754e-05, "loss": 0.336, "step": 1000 }, { "epoch": 0.07990740001596551, "grad_norm": 0.2012465287567138, "learning_rate": 1.9869725168011603e-05, "loss": 0.284, "step": 1001 }, { "epoch": 0.07998722758840904, "grad_norm": 0.18723714193835433, "learning_rate": 1.9869308866193383e-05, "loss": 0.3385, "step": 1002 }, { "epoch": 0.08006705516085255, "grad_norm": 0.2488797606379662, "learning_rate": 1.9868891904650924e-05, "loss": 0.2469, "step": 1003 }, { "epoch": 0.08014688273329608, "grad_norm": 0.19476957847127507, "learning_rate": 1.9868474283412095e-05, "loss": 0.2537, "step": 1004 }, { "epoch": 0.0802267103057396, "grad_norm": 0.24263843737853322, "learning_rate": 1.9868056002504812e-05, "loss": 0.276, "step": 1005 }, { "epoch": 0.08030653787818312, "grad_norm": 0.23228482645456788, "learning_rate": 1.986763706195704e-05, "loss": 0.2802, "step": 1006 }, { "epoch": 0.08038636545062665, "grad_norm": 0.19722243329808034, "learning_rate": 1.986721746179678e-05, "loss": 0.2776, "step": 1007 }, { "epoch": 0.08046619302307016, "grad_norm": 0.18697395297796016, "learning_rate": 1.9866797202052082e-05, "loss": 0.2787, "step": 1008 }, { "epoch": 0.08054602059551369, "grad_norm": 0.2445040245883315, "learning_rate": 1.9866376282751037e-05, "loss": 0.2988, "step": 1009 }, { "epoch": 0.08062584816795722, "grad_norm": 0.1838084294051106, "learning_rate": 1.986595470392178e-05, "loss": 0.2873, "step": 1010 }, { "epoch": 0.08070567574040073, "grad_norm": 0.1871411993153743, "learning_rate": 1.98655324655925e-05, "loss": 0.2929, "step": 1011 }, { "epoch": 0.08078550331284426, "grad_norm": 0.21427218080085172, "learning_rate": 1.9865109567791414e-05, "loss": 0.3355, "step": 1012 }, { "epoch": 0.08086533088528777, "grad_norm": 0.27576744679609505, "learning_rate": 1.9864686010546795e-05, "loss": 0.2852, "step": 1013 }, { "epoch": 0.0809451584577313, "grad_norm": 0.16275112333537764, "learning_rate": 1.9864261793886955e-05, "loss": 0.2943, "step": 1014 }, { "epoch": 0.08102498603017483, "grad_norm": 0.20091273277391458, "learning_rate": 1.986383691784025e-05, "loss": 0.2665, "step": 1015 }, { "epoch": 0.08110481360261834, "grad_norm": 0.2203500454711935, "learning_rate": 1.986341138243508e-05, "loss": 0.2455, "step": 1016 }, { "epoch": 0.08118464117506187, "grad_norm": 0.19610000086782922, "learning_rate": 1.98629851876999e-05, "loss": 0.3169, "step": 1017 }, { "epoch": 0.08126446874750538, "grad_norm": 0.1998423335964387, "learning_rate": 1.9862558333663185e-05, "loss": 0.3094, "step": 1018 }, { "epoch": 0.08134429631994891, "grad_norm": 0.20411630358471275, "learning_rate": 1.986213082035348e-05, "loss": 0.3269, "step": 1019 }, { "epoch": 0.08142412389239244, "grad_norm": 0.1839065707900187, "learning_rate": 1.986170264779936e-05, "loss": 0.2992, "step": 1020 }, { "epoch": 0.08150395146483595, "grad_norm": 0.19867722382626057, "learning_rate": 1.986127381602944e-05, "loss": 0.2702, "step": 1021 }, { "epoch": 0.08158377903727948, "grad_norm": 0.19580723935107597, "learning_rate": 1.9860844325072392e-05, "loss": 0.2517, "step": 1022 }, { "epoch": 0.081663606609723, "grad_norm": 0.17509936203997634, "learning_rate": 1.9860414174956924e-05, "loss": 0.2725, "step": 1023 }, { "epoch": 0.08174343418216652, "grad_norm": 0.20087688555413527, "learning_rate": 1.985998336571179e-05, "loss": 0.2588, "step": 1024 }, { "epoch": 0.08182326175461004, "grad_norm": 0.21773780973406115, "learning_rate": 1.985955189736579e-05, "loss": 0.2842, "step": 1025 }, { "epoch": 0.08190308932705356, "grad_norm": 0.176379178034207, "learning_rate": 1.9859119769947765e-05, "loss": 0.2971, "step": 1026 }, { "epoch": 0.08198291689949709, "grad_norm": 0.17383775333744186, "learning_rate": 1.9858686983486596e-05, "loss": 0.252, "step": 1027 }, { "epoch": 0.0820627444719406, "grad_norm": 0.2185598488801046, "learning_rate": 1.9858253538011222e-05, "loss": 0.2665, "step": 1028 }, { "epoch": 0.08214257204438413, "grad_norm": 0.18667605764918163, "learning_rate": 1.9857819433550606e-05, "loss": 0.276, "step": 1029 }, { "epoch": 0.08222239961682765, "grad_norm": 0.24186047967548513, "learning_rate": 1.985738467013378e-05, "loss": 0.2626, "step": 1030 }, { "epoch": 0.08230222718927117, "grad_norm": 0.1732556382572157, "learning_rate": 1.9856949247789796e-05, "loss": 0.2538, "step": 1031 }, { "epoch": 0.0823820547617147, "grad_norm": 0.2575110876015586, "learning_rate": 1.9856513166547756e-05, "loss": 0.3008, "step": 1032 }, { "epoch": 0.08246188233415822, "grad_norm": 0.18451310291491424, "learning_rate": 1.9856076426436825e-05, "loss": 0.2944, "step": 1033 }, { "epoch": 0.08254170990660174, "grad_norm": 0.19836403851617326, "learning_rate": 1.985563902748619e-05, "loss": 0.2805, "step": 1034 }, { "epoch": 0.08262153747904526, "grad_norm": 0.18718226692573758, "learning_rate": 1.9855200969725086e-05, "loss": 0.3301, "step": 1035 }, { "epoch": 0.08270136505148878, "grad_norm": 0.20371257091480566, "learning_rate": 1.9854762253182798e-05, "loss": 0.2712, "step": 1036 }, { "epoch": 0.08278119262393231, "grad_norm": 0.19158076264459023, "learning_rate": 1.985432287788865e-05, "loss": 0.3194, "step": 1037 }, { "epoch": 0.08286102019637583, "grad_norm": 0.20880473328412436, "learning_rate": 1.9853882843872024e-05, "loss": 0.3214, "step": 1038 }, { "epoch": 0.08294084776881935, "grad_norm": 0.1767588477264103, "learning_rate": 1.985344215116232e-05, "loss": 0.2767, "step": 1039 }, { "epoch": 0.08302067534126287, "grad_norm": 0.19225567732814783, "learning_rate": 1.9853000799789e-05, "loss": 0.24, "step": 1040 }, { "epoch": 0.0831005029137064, "grad_norm": 0.2895301897310489, "learning_rate": 1.9852558789781576e-05, "loss": 0.2705, "step": 1041 }, { "epoch": 0.08318033048614992, "grad_norm": 0.21545521611325985, "learning_rate": 1.985211612116958e-05, "loss": 0.2616, "step": 1042 }, { "epoch": 0.08326015805859344, "grad_norm": 0.20777811309577757, "learning_rate": 1.9851672793982618e-05, "loss": 0.2546, "step": 1043 }, { "epoch": 0.08333998563103696, "grad_norm": 0.21002902420494757, "learning_rate": 1.985122880825031e-05, "loss": 0.2879, "step": 1044 }, { "epoch": 0.08341981320348048, "grad_norm": 0.221993495205692, "learning_rate": 1.9850784164002346e-05, "loss": 0.2488, "step": 1045 }, { "epoch": 0.083499640775924, "grad_norm": 0.2320967634765134, "learning_rate": 1.9850338861268443e-05, "loss": 0.2846, "step": 1046 }, { "epoch": 0.08357946834836752, "grad_norm": 0.2208909792923983, "learning_rate": 1.9849892900078368e-05, "loss": 0.2637, "step": 1047 }, { "epoch": 0.08365929592081105, "grad_norm": 0.19702134149373157, "learning_rate": 1.9849446280461936e-05, "loss": 0.2734, "step": 1048 }, { "epoch": 0.08373912349325457, "grad_norm": 0.24773370203572442, "learning_rate": 1.9848999002448996e-05, "loss": 0.27, "step": 1049 }, { "epoch": 0.08381895106569809, "grad_norm": 0.1956124172088338, "learning_rate": 1.984855106606945e-05, "loss": 0.2925, "step": 1050 }, { "epoch": 0.08389877863814162, "grad_norm": 0.20875420348721213, "learning_rate": 1.9848102471353238e-05, "loss": 0.2927, "step": 1051 }, { "epoch": 0.08397860621058513, "grad_norm": 0.20385090151913077, "learning_rate": 1.9847653218330353e-05, "loss": 0.2688, "step": 1052 }, { "epoch": 0.08405843378302866, "grad_norm": 0.17814217287970172, "learning_rate": 1.984720330703082e-05, "loss": 0.3427, "step": 1053 }, { "epoch": 0.08413826135547219, "grad_norm": 0.19366896789303073, "learning_rate": 1.9846752737484712e-05, "loss": 0.2711, "step": 1054 }, { "epoch": 0.0842180889279157, "grad_norm": 0.19718652534754497, "learning_rate": 1.9846301509722157e-05, "loss": 0.3283, "step": 1055 }, { "epoch": 0.08429791650035923, "grad_norm": 0.20195442609476078, "learning_rate": 1.9845849623773307e-05, "loss": 0.2854, "step": 1056 }, { "epoch": 0.08437774407280274, "grad_norm": 0.232124412730841, "learning_rate": 1.984539707966838e-05, "loss": 0.2869, "step": 1057 }, { "epoch": 0.08445757164524627, "grad_norm": 0.188161786955155, "learning_rate": 1.9844943877437613e-05, "loss": 0.2534, "step": 1058 }, { "epoch": 0.0845373992176898, "grad_norm": 0.23520418999649195, "learning_rate": 1.9844490017111315e-05, "loss": 0.294, "step": 1059 }, { "epoch": 0.08461722679013331, "grad_norm": 0.19586291501544198, "learning_rate": 1.9844035498719814e-05, "loss": 0.3437, "step": 1060 }, { "epoch": 0.08469705436257684, "grad_norm": 0.2020348706575156, "learning_rate": 1.9843580322293498e-05, "loss": 0.2964, "step": 1061 }, { "epoch": 0.08477688193502035, "grad_norm": 0.2532196656018058, "learning_rate": 1.9843124487862794e-05, "loss": 0.2879, "step": 1062 }, { "epoch": 0.08485670950746388, "grad_norm": 0.24917614042374112, "learning_rate": 1.984266799545817e-05, "loss": 0.228, "step": 1063 }, { "epoch": 0.0849365370799074, "grad_norm": 0.2045437360389878, "learning_rate": 1.984221084511014e-05, "loss": 0.2811, "step": 1064 }, { "epoch": 0.08501636465235092, "grad_norm": 0.18210192335272848, "learning_rate": 1.984175303684927e-05, "loss": 0.3, "step": 1065 }, { "epoch": 0.08509619222479445, "grad_norm": 0.19950502597896214, "learning_rate": 1.9841294570706154e-05, "loss": 0.2762, "step": 1066 }, { "epoch": 0.08517601979723796, "grad_norm": 0.27492494106274745, "learning_rate": 1.9840835446711445e-05, "loss": 0.2646, "step": 1067 }, { "epoch": 0.08525584736968149, "grad_norm": 0.21581563200130124, "learning_rate": 1.9840375664895824e-05, "loss": 0.2877, "step": 1068 }, { "epoch": 0.085335674942125, "grad_norm": 0.3098304185664285, "learning_rate": 1.983991522529004e-05, "loss": 0.2724, "step": 1069 }, { "epoch": 0.08541550251456853, "grad_norm": 0.2272592163601382, "learning_rate": 1.983945412792486e-05, "loss": 0.2966, "step": 1070 }, { "epoch": 0.08549533008701206, "grad_norm": 0.22020944633982947, "learning_rate": 1.9838992372831112e-05, "loss": 0.259, "step": 1071 }, { "epoch": 0.08557515765945557, "grad_norm": 0.29508657664908544, "learning_rate": 1.9838529960039657e-05, "loss": 0.3017, "step": 1072 }, { "epoch": 0.0856549852318991, "grad_norm": 0.21376951093961424, "learning_rate": 1.9838066889581414e-05, "loss": 0.2633, "step": 1073 }, { "epoch": 0.08573481280434261, "grad_norm": 0.20997890926310864, "learning_rate": 1.9837603161487332e-05, "loss": 0.3242, "step": 1074 }, { "epoch": 0.08581464037678614, "grad_norm": 0.27416159858026223, "learning_rate": 1.9837138775788407e-05, "loss": 0.3133, "step": 1075 }, { "epoch": 0.08589446794922967, "grad_norm": 0.2521805647255479, "learning_rate": 1.983667373251569e-05, "loss": 0.2651, "step": 1076 }, { "epoch": 0.08597429552167318, "grad_norm": 0.2503008606881017, "learning_rate": 1.9836208031700256e-05, "loss": 0.2433, "step": 1077 }, { "epoch": 0.08605412309411671, "grad_norm": 0.21748338638074016, "learning_rate": 1.9835741673373245e-05, "loss": 0.2226, "step": 1078 }, { "epoch": 0.08613395066656022, "grad_norm": 0.2569503880596774, "learning_rate": 1.9835274657565828e-05, "loss": 0.214, "step": 1079 }, { "epoch": 0.08621377823900375, "grad_norm": 0.24573926365726356, "learning_rate": 1.9834806984309217e-05, "loss": 0.2508, "step": 1080 }, { "epoch": 0.08629360581144728, "grad_norm": 0.19060018725430553, "learning_rate": 1.9834338653634686e-05, "loss": 0.2795, "step": 1081 }, { "epoch": 0.0863734333838908, "grad_norm": 0.26903837385736473, "learning_rate": 1.983386966557353e-05, "loss": 0.34, "step": 1082 }, { "epoch": 0.08645326095633432, "grad_norm": 0.20601801178825163, "learning_rate": 1.9833400020157106e-05, "loss": 0.2411, "step": 1083 }, { "epoch": 0.08653308852877784, "grad_norm": 0.18192671804761104, "learning_rate": 1.983292971741681e-05, "loss": 0.2747, "step": 1084 }, { "epoch": 0.08661291610122136, "grad_norm": 0.2218798358477803, "learning_rate": 1.9832458757384066e-05, "loss": 0.2803, "step": 1085 }, { "epoch": 0.08669274367366489, "grad_norm": 0.273081989908339, "learning_rate": 1.9831987140090373e-05, "loss": 0.2222, "step": 1086 }, { "epoch": 0.0867725712461084, "grad_norm": 0.1866259210867006, "learning_rate": 1.9831514865567244e-05, "loss": 0.248, "step": 1087 }, { "epoch": 0.08685239881855193, "grad_norm": 0.2848468710594219, "learning_rate": 1.9831041933846254e-05, "loss": 0.3026, "step": 1088 }, { "epoch": 0.08693222639099545, "grad_norm": 0.2319922283527784, "learning_rate": 1.9830568344959018e-05, "loss": 0.3097, "step": 1089 }, { "epoch": 0.08701205396343897, "grad_norm": 0.34084150098925114, "learning_rate": 1.983009409893719e-05, "loss": 0.2701, "step": 1090 }, { "epoch": 0.08709188153588249, "grad_norm": 0.19042404129765295, "learning_rate": 1.982961919581248e-05, "loss": 0.3114, "step": 1091 }, { "epoch": 0.08717170910832601, "grad_norm": 0.18304924562057798, "learning_rate": 1.9829143635616618e-05, "loss": 0.2838, "step": 1092 }, { "epoch": 0.08725153668076954, "grad_norm": 0.24723813101592992, "learning_rate": 1.982866741838141e-05, "loss": 0.2441, "step": 1093 }, { "epoch": 0.08733136425321306, "grad_norm": 0.2728056554709952, "learning_rate": 1.9828190544138675e-05, "loss": 0.2941, "step": 1094 }, { "epoch": 0.08741119182565658, "grad_norm": 0.27833953327576083, "learning_rate": 1.9827713012920297e-05, "loss": 0.2892, "step": 1095 }, { "epoch": 0.0874910193981001, "grad_norm": 0.26711869781105635, "learning_rate": 1.98272348247582e-05, "loss": 0.3456, "step": 1096 }, { "epoch": 0.08757084697054363, "grad_norm": 0.29357150364112244, "learning_rate": 1.9826755979684343e-05, "loss": 0.2888, "step": 1097 }, { "epoch": 0.08765067454298715, "grad_norm": 0.24581877231402005, "learning_rate": 1.982627647773074e-05, "loss": 0.2621, "step": 1098 }, { "epoch": 0.08773050211543067, "grad_norm": 0.231686526603851, "learning_rate": 1.9825796318929438e-05, "loss": 0.2753, "step": 1099 }, { "epoch": 0.0878103296878742, "grad_norm": 0.24137078272970794, "learning_rate": 1.9825315503312536e-05, "loss": 0.3002, "step": 1100 }, { "epoch": 0.08789015726031771, "grad_norm": 0.23775555739187118, "learning_rate": 1.9824834030912176e-05, "loss": 0.3101, "step": 1101 }, { "epoch": 0.08796998483276124, "grad_norm": 0.21096029878548014, "learning_rate": 1.9824351901760544e-05, "loss": 0.3158, "step": 1102 }, { "epoch": 0.08804981240520476, "grad_norm": 0.22646722588771298, "learning_rate": 1.9823869115889866e-05, "loss": 0.2647, "step": 1103 }, { "epoch": 0.08812963997764828, "grad_norm": 0.21653713365172736, "learning_rate": 1.9823385673332415e-05, "loss": 0.2447, "step": 1104 }, { "epoch": 0.0882094675500918, "grad_norm": 0.21212794788438852, "learning_rate": 1.9822901574120506e-05, "loss": 0.2301, "step": 1105 }, { "epoch": 0.08828929512253532, "grad_norm": 0.20646076913008563, "learning_rate": 1.98224168182865e-05, "loss": 0.2685, "step": 1106 }, { "epoch": 0.08836912269497885, "grad_norm": 0.19870392708572324, "learning_rate": 1.9821931405862805e-05, "loss": 0.303, "step": 1107 }, { "epoch": 0.08844895026742237, "grad_norm": 0.2566644625835496, "learning_rate": 1.9821445336881862e-05, "loss": 0.2802, "step": 1108 }, { "epoch": 0.08852877783986589, "grad_norm": 0.21146811758760542, "learning_rate": 1.9820958611376168e-05, "loss": 0.2739, "step": 1109 }, { "epoch": 0.08860860541230942, "grad_norm": 0.18956063030704765, "learning_rate": 1.9820471229378252e-05, "loss": 0.2879, "step": 1110 }, { "epoch": 0.08868843298475293, "grad_norm": 0.20845718454990564, "learning_rate": 1.98199831909207e-05, "loss": 0.2527, "step": 1111 }, { "epoch": 0.08876826055719646, "grad_norm": 0.19779303675003704, "learning_rate": 1.9819494496036137e-05, "loss": 0.2527, "step": 1112 }, { "epoch": 0.08884808812963997, "grad_norm": 0.22570070284954408, "learning_rate": 1.981900514475722e-05, "loss": 0.2906, "step": 1113 }, { "epoch": 0.0889279157020835, "grad_norm": 0.19661035896942655, "learning_rate": 1.9818515137116675e-05, "loss": 0.2609, "step": 1114 }, { "epoch": 0.08900774327452703, "grad_norm": 0.24211496463674087, "learning_rate": 1.9818024473147242e-05, "loss": 0.3303, "step": 1115 }, { "epoch": 0.08908757084697054, "grad_norm": 0.23640594912987598, "learning_rate": 1.981753315288173e-05, "loss": 0.32, "step": 1116 }, { "epoch": 0.08916739841941407, "grad_norm": 0.19473990207578867, "learning_rate": 1.981704117635298e-05, "loss": 0.2837, "step": 1117 }, { "epoch": 0.08924722599185758, "grad_norm": 0.24755025220449584, "learning_rate": 1.9816548543593877e-05, "loss": 0.3308, "step": 1118 }, { "epoch": 0.08932705356430111, "grad_norm": 0.3422899511253733, "learning_rate": 1.981605525463735e-05, "loss": 0.2851, "step": 1119 }, { "epoch": 0.08940688113674464, "grad_norm": 0.2278816852969558, "learning_rate": 1.981556130951638e-05, "loss": 0.2279, "step": 1120 }, { "epoch": 0.08948670870918815, "grad_norm": 0.23715607126386673, "learning_rate": 1.9815066708263976e-05, "loss": 0.2927, "step": 1121 }, { "epoch": 0.08956653628163168, "grad_norm": 0.3549396933524374, "learning_rate": 1.9814571450913208e-05, "loss": 0.3025, "step": 1122 }, { "epoch": 0.08964636385407519, "grad_norm": 0.2362620023719934, "learning_rate": 1.9814075537497176e-05, "loss": 0.2623, "step": 1123 }, { "epoch": 0.08972619142651872, "grad_norm": 0.19865832739693207, "learning_rate": 1.9813578968049035e-05, "loss": 0.2967, "step": 1124 }, { "epoch": 0.08980601899896225, "grad_norm": 0.2521616194676834, "learning_rate": 1.9813081742601977e-05, "loss": 0.2334, "step": 1125 }, { "epoch": 0.08988584657140576, "grad_norm": 0.23687147827804525, "learning_rate": 1.9812583861189237e-05, "loss": 0.2848, "step": 1126 }, { "epoch": 0.08996567414384929, "grad_norm": 0.22650077763398666, "learning_rate": 1.98120853238441e-05, "loss": 0.2962, "step": 1127 }, { "epoch": 0.0900455017162928, "grad_norm": 0.34462439611178497, "learning_rate": 1.981158613059989e-05, "loss": 0.29, "step": 1128 }, { "epoch": 0.09012532928873633, "grad_norm": 0.28367559429946504, "learning_rate": 1.9811086281489973e-05, "loss": 0.2608, "step": 1129 }, { "epoch": 0.09020515686117986, "grad_norm": 0.28980889727945697, "learning_rate": 1.9810585776547767e-05, "loss": 0.2537, "step": 1130 }, { "epoch": 0.09028498443362337, "grad_norm": 0.22912353551673129, "learning_rate": 1.9810084615806727e-05, "loss": 0.2875, "step": 1131 }, { "epoch": 0.0903648120060669, "grad_norm": 0.2675133001602059, "learning_rate": 1.9809582799300352e-05, "loss": 0.2842, "step": 1132 }, { "epoch": 0.09044463957851041, "grad_norm": 0.212570609863037, "learning_rate": 1.9809080327062188e-05, "loss": 0.2956, "step": 1133 }, { "epoch": 0.09052446715095394, "grad_norm": 0.2538746253206107, "learning_rate": 1.980857719912582e-05, "loss": 0.2308, "step": 1134 }, { "epoch": 0.09060429472339745, "grad_norm": 0.19358846187353498, "learning_rate": 1.9808073415524888e-05, "loss": 0.2926, "step": 1135 }, { "epoch": 0.09068412229584098, "grad_norm": 0.2421778543672009, "learning_rate": 1.9807568976293057e-05, "loss": 0.3051, "step": 1136 }, { "epoch": 0.09076394986828451, "grad_norm": 0.2524467056419753, "learning_rate": 1.9807063881464055e-05, "loss": 0.2714, "step": 1137 }, { "epoch": 0.09084377744072802, "grad_norm": 0.35887308528856626, "learning_rate": 1.9806558131071646e-05, "loss": 0.3002, "step": 1138 }, { "epoch": 0.09092360501317155, "grad_norm": 0.19774507502396632, "learning_rate": 1.9806051725149632e-05, "loss": 0.2848, "step": 1139 }, { "epoch": 0.09100343258561507, "grad_norm": 0.26840802967489885, "learning_rate": 1.9805544663731866e-05, "loss": 0.2899, "step": 1140 }, { "epoch": 0.0910832601580586, "grad_norm": 0.1885418617873179, "learning_rate": 1.980503694685225e-05, "loss": 0.2515, "step": 1141 }, { "epoch": 0.09116308773050212, "grad_norm": 0.23177703155400217, "learning_rate": 1.9804528574544708e-05, "loss": 0.3266, "step": 1142 }, { "epoch": 0.09124291530294563, "grad_norm": 0.25112021384603767, "learning_rate": 1.9804019546843235e-05, "loss": 0.2856, "step": 1143 }, { "epoch": 0.09132274287538916, "grad_norm": 0.24686985069124137, "learning_rate": 1.9803509863781857e-05, "loss": 0.2953, "step": 1144 }, { "epoch": 0.09140257044783268, "grad_norm": 0.21262729655589654, "learning_rate": 1.9802999525394636e-05, "loss": 0.2959, "step": 1145 }, { "epoch": 0.0914823980202762, "grad_norm": 0.21686853496415082, "learning_rate": 1.9802488531715696e-05, "loss": 0.2661, "step": 1146 }, { "epoch": 0.09156222559271973, "grad_norm": 0.27036397049135447, "learning_rate": 1.9801976882779187e-05, "loss": 0.2492, "step": 1147 }, { "epoch": 0.09164205316516325, "grad_norm": 0.2189357595731918, "learning_rate": 1.9801464578619318e-05, "loss": 0.2625, "step": 1148 }, { "epoch": 0.09172188073760677, "grad_norm": 0.22335004224555746, "learning_rate": 1.9800951619270326e-05, "loss": 0.2811, "step": 1149 }, { "epoch": 0.09180170831005029, "grad_norm": 0.21629028281088042, "learning_rate": 1.9800438004766507e-05, "loss": 0.2626, "step": 1150 }, { "epoch": 0.09188153588249381, "grad_norm": 0.23249683458259018, "learning_rate": 1.979992373514219e-05, "loss": 0.2735, "step": 1151 }, { "epoch": 0.09196136345493734, "grad_norm": 0.2051687643826065, "learning_rate": 1.9799408810431757e-05, "loss": 0.2525, "step": 1152 }, { "epoch": 0.09204119102738086, "grad_norm": 0.1993050881521077, "learning_rate": 1.979889323066962e-05, "loss": 0.2865, "step": 1153 }, { "epoch": 0.09212101859982438, "grad_norm": 0.20271185497539004, "learning_rate": 1.9798376995890254e-05, "loss": 0.2768, "step": 1154 }, { "epoch": 0.0922008461722679, "grad_norm": 0.24013753192752882, "learning_rate": 1.979786010612816e-05, "loss": 0.2596, "step": 1155 }, { "epoch": 0.09228067374471143, "grad_norm": 0.22223110974285856, "learning_rate": 1.9797342561417895e-05, "loss": 0.2787, "step": 1156 }, { "epoch": 0.09236050131715494, "grad_norm": 0.21190792433572458, "learning_rate": 1.9796824361794046e-05, "loss": 0.2495, "step": 1157 }, { "epoch": 0.09244032888959847, "grad_norm": 0.21316223385028363, "learning_rate": 1.9796305507291264e-05, "loss": 0.2249, "step": 1158 }, { "epoch": 0.092520156462042, "grad_norm": 0.2146937759011567, "learning_rate": 1.9795785997944227e-05, "loss": 0.2428, "step": 1159 }, { "epoch": 0.09259998403448551, "grad_norm": 0.2587205197638052, "learning_rate": 1.9795265833787657e-05, "loss": 0.2584, "step": 1160 }, { "epoch": 0.09267981160692904, "grad_norm": 0.19566538156100902, "learning_rate": 1.9794745014856336e-05, "loss": 0.2472, "step": 1161 }, { "epoch": 0.09275963917937255, "grad_norm": 0.19728168049346737, "learning_rate": 1.9794223541185068e-05, "loss": 0.2425, "step": 1162 }, { "epoch": 0.09283946675181608, "grad_norm": 0.2540820906563324, "learning_rate": 1.979370141280872e-05, "loss": 0.2519, "step": 1163 }, { "epoch": 0.0929192943242596, "grad_norm": 0.18285686541132756, "learning_rate": 1.9793178629762188e-05, "loss": 0.2301, "step": 1164 }, { "epoch": 0.09299912189670312, "grad_norm": 0.2413886568286698, "learning_rate": 1.9792655192080423e-05, "loss": 0.2942, "step": 1165 }, { "epoch": 0.09307894946914665, "grad_norm": 0.19432383942154505, "learning_rate": 1.979213109979841e-05, "loss": 0.2893, "step": 1166 }, { "epoch": 0.09315877704159016, "grad_norm": 0.20845269000708724, "learning_rate": 1.9791606352951184e-05, "loss": 0.2777, "step": 1167 }, { "epoch": 0.09323860461403369, "grad_norm": 0.26655160086558805, "learning_rate": 1.979108095157382e-05, "loss": 0.2365, "step": 1168 }, { "epoch": 0.09331843218647722, "grad_norm": 0.2429681234979235, "learning_rate": 1.979055489570145e-05, "loss": 0.2846, "step": 1169 }, { "epoch": 0.09339825975892073, "grad_norm": 0.19756505585034417, "learning_rate": 1.9790028185369222e-05, "loss": 0.2598, "step": 1170 }, { "epoch": 0.09347808733136426, "grad_norm": 0.2766146139034864, "learning_rate": 1.978950082061236e-05, "loss": 0.278, "step": 1171 }, { "epoch": 0.09355791490380777, "grad_norm": 0.2815283690629161, "learning_rate": 1.9788972801466105e-05, "loss": 0.2569, "step": 1172 }, { "epoch": 0.0936377424762513, "grad_norm": 0.21568186726087984, "learning_rate": 1.978844412796576e-05, "loss": 0.2693, "step": 1173 }, { "epoch": 0.09371757004869483, "grad_norm": 0.2834950640754469, "learning_rate": 1.9787914800146657e-05, "loss": 0.2955, "step": 1174 }, { "epoch": 0.09379739762113834, "grad_norm": 0.19799764816440443, "learning_rate": 1.978738481804419e-05, "loss": 0.231, "step": 1175 }, { "epoch": 0.09387722519358187, "grad_norm": 0.2108775996345867, "learning_rate": 1.978685418169378e-05, "loss": 0.3173, "step": 1176 }, { "epoch": 0.09395705276602538, "grad_norm": 0.21865404797152324, "learning_rate": 1.9786322891130896e-05, "loss": 0.2784, "step": 1177 }, { "epoch": 0.09403688033846891, "grad_norm": 0.3073374770917741, "learning_rate": 1.9785790946391056e-05, "loss": 0.2331, "step": 1178 }, { "epoch": 0.09411670791091242, "grad_norm": 0.2222444658478825, "learning_rate": 1.978525834750982e-05, "loss": 0.2974, "step": 1179 }, { "epoch": 0.09419653548335595, "grad_norm": 0.2182566834726537, "learning_rate": 1.9784725094522784e-05, "loss": 0.3127, "step": 1180 }, { "epoch": 0.09427636305579948, "grad_norm": 0.2982748511790782, "learning_rate": 1.97841911874656e-05, "loss": 0.3462, "step": 1181 }, { "epoch": 0.09435619062824299, "grad_norm": 0.21593575418058716, "learning_rate": 1.9783656626373957e-05, "loss": 0.231, "step": 1182 }, { "epoch": 0.09443601820068652, "grad_norm": 0.20894381914105584, "learning_rate": 1.9783121411283584e-05, "loss": 0.3077, "step": 1183 }, { "epoch": 0.09451584577313003, "grad_norm": 0.25800341396494725, "learning_rate": 1.978258554223026e-05, "loss": 0.2428, "step": 1184 }, { "epoch": 0.09459567334557356, "grad_norm": 0.2178675032945491, "learning_rate": 1.978204901924981e-05, "loss": 0.3261, "step": 1185 }, { "epoch": 0.09467550091801709, "grad_norm": 0.20233324520218057, "learning_rate": 1.9781511842378095e-05, "loss": 0.3154, "step": 1186 }, { "epoch": 0.0947553284904606, "grad_norm": 0.22993994689260427, "learning_rate": 1.978097401165102e-05, "loss": 0.2498, "step": 1187 }, { "epoch": 0.09483515606290413, "grad_norm": 0.23166958930930565, "learning_rate": 1.978043552710454e-05, "loss": 0.2366, "step": 1188 }, { "epoch": 0.09491498363534764, "grad_norm": 0.23071913217523043, "learning_rate": 1.977989638877465e-05, "loss": 0.2682, "step": 1189 }, { "epoch": 0.09499481120779117, "grad_norm": 0.18507866007341353, "learning_rate": 1.977935659669739e-05, "loss": 0.2768, "step": 1190 }, { "epoch": 0.0950746387802347, "grad_norm": 0.22232473753878548, "learning_rate": 1.9778816150908845e-05, "loss": 0.2078, "step": 1191 }, { "epoch": 0.09515446635267821, "grad_norm": 0.303085428435137, "learning_rate": 1.9778275051445134e-05, "loss": 0.2426, "step": 1192 }, { "epoch": 0.09523429392512174, "grad_norm": 0.22979736744894114, "learning_rate": 1.9777733298342435e-05, "loss": 0.2773, "step": 1193 }, { "epoch": 0.09531412149756525, "grad_norm": 0.24615656856845303, "learning_rate": 1.977719089163696e-05, "loss": 0.2551, "step": 1194 }, { "epoch": 0.09539394907000878, "grad_norm": 0.32472948794663187, "learning_rate": 1.977664783136497e-05, "loss": 0.3191, "step": 1195 }, { "epoch": 0.09547377664245231, "grad_norm": 0.3388265703128939, "learning_rate": 1.977610411756276e-05, "loss": 0.2519, "step": 1196 }, { "epoch": 0.09555360421489582, "grad_norm": 0.2499599084165142, "learning_rate": 1.977555975026668e-05, "loss": 0.2806, "step": 1197 }, { "epoch": 0.09563343178733935, "grad_norm": 0.22999683441440874, "learning_rate": 1.977501472951311e-05, "loss": 0.2155, "step": 1198 }, { "epoch": 0.09571325935978287, "grad_norm": 0.25744516062004213, "learning_rate": 1.9774469055338496e-05, "loss": 0.2647, "step": 1199 }, { "epoch": 0.09579308693222639, "grad_norm": 0.21921658952982612, "learning_rate": 1.97739227277793e-05, "loss": 0.3095, "step": 1200 }, { "epoch": 0.0958729145046699, "grad_norm": 0.385391886513724, "learning_rate": 1.9773375746872058e-05, "loss": 0.2506, "step": 1201 }, { "epoch": 0.09595274207711343, "grad_norm": 0.20282880523387284, "learning_rate": 1.9772828112653317e-05, "loss": 0.3147, "step": 1202 }, { "epoch": 0.09603256964955696, "grad_norm": 0.26794807021434397, "learning_rate": 1.9772279825159697e-05, "loss": 0.2406, "step": 1203 }, { "epoch": 0.09611239722200048, "grad_norm": 0.31593045194341096, "learning_rate": 1.9771730884427844e-05, "loss": 0.2743, "step": 1204 }, { "epoch": 0.096192224794444, "grad_norm": 0.2625379949823573, "learning_rate": 1.977118129049445e-05, "loss": 0.244, "step": 1205 }, { "epoch": 0.09627205236688752, "grad_norm": 0.27595301125656563, "learning_rate": 1.9770631043396254e-05, "loss": 0.2816, "step": 1206 }, { "epoch": 0.09635187993933104, "grad_norm": 0.4283349839393684, "learning_rate": 1.9770080143170043e-05, "loss": 0.2977, "step": 1207 }, { "epoch": 0.09643170751177457, "grad_norm": 0.20456285836004776, "learning_rate": 1.9769528589852634e-05, "loss": 0.2518, "step": 1208 }, { "epoch": 0.09651153508421809, "grad_norm": 0.3602224354517552, "learning_rate": 1.9768976383480903e-05, "loss": 0.2356, "step": 1209 }, { "epoch": 0.09659136265666161, "grad_norm": 0.3372308576443673, "learning_rate": 1.976842352409176e-05, "loss": 0.2345, "step": 1210 }, { "epoch": 0.09667119022910513, "grad_norm": 0.21956787781905363, "learning_rate": 1.9767870011722166e-05, "loss": 0.2458, "step": 1211 }, { "epoch": 0.09675101780154866, "grad_norm": 0.2416343362676655, "learning_rate": 1.9767315846409115e-05, "loss": 0.2655, "step": 1212 }, { "epoch": 0.09683084537399218, "grad_norm": 0.26213901078212193, "learning_rate": 1.9766761028189655e-05, "loss": 0.253, "step": 1213 }, { "epoch": 0.0969106729464357, "grad_norm": 0.2726078596072805, "learning_rate": 1.976620555710087e-05, "loss": 0.2831, "step": 1214 }, { "epoch": 0.09699050051887922, "grad_norm": 0.21438863729017638, "learning_rate": 1.976564943317989e-05, "loss": 0.2277, "step": 1215 }, { "epoch": 0.09707032809132274, "grad_norm": 0.25273932756890405, "learning_rate": 1.9765092656463897e-05, "loss": 0.2298, "step": 1216 }, { "epoch": 0.09715015566376627, "grad_norm": 0.30182407255279914, "learning_rate": 1.9764535226990098e-05, "loss": 0.2734, "step": 1217 }, { "epoch": 0.0972299832362098, "grad_norm": 0.19725803553969337, "learning_rate": 1.9763977144795766e-05, "loss": 0.2973, "step": 1218 }, { "epoch": 0.09730981080865331, "grad_norm": 0.21673521569355986, "learning_rate": 1.97634184099182e-05, "loss": 0.2216, "step": 1219 }, { "epoch": 0.09738963838109684, "grad_norm": 0.2604602530912635, "learning_rate": 1.9762859022394753e-05, "loss": 0.246, "step": 1220 }, { "epoch": 0.09746946595354035, "grad_norm": 0.3154656001945269, "learning_rate": 1.9762298982262814e-05, "loss": 0.2089, "step": 1221 }, { "epoch": 0.09754929352598388, "grad_norm": 0.2782806394957799, "learning_rate": 1.9761738289559824e-05, "loss": 0.2574, "step": 1222 }, { "epoch": 0.09762912109842739, "grad_norm": 0.3136837947015929, "learning_rate": 1.976117694432326e-05, "loss": 0.2554, "step": 1223 }, { "epoch": 0.09770894867087092, "grad_norm": 0.22832626700082412, "learning_rate": 1.976061494659065e-05, "loss": 0.2477, "step": 1224 }, { "epoch": 0.09778877624331445, "grad_norm": 0.22474825768884138, "learning_rate": 1.976005229639955e-05, "loss": 0.2262, "step": 1225 }, { "epoch": 0.09786860381575796, "grad_norm": 0.25218014317583903, "learning_rate": 1.9759488993787585e-05, "loss": 0.2412, "step": 1226 }, { "epoch": 0.09794843138820149, "grad_norm": 0.2721841533485058, "learning_rate": 1.97589250387924e-05, "loss": 0.2681, "step": 1227 }, { "epoch": 0.098028258960645, "grad_norm": 0.22189583893672862, "learning_rate": 1.9758360431451696e-05, "loss": 0.2421, "step": 1228 }, { "epoch": 0.09810808653308853, "grad_norm": 0.271071068142769, "learning_rate": 1.9757795171803217e-05, "loss": 0.2401, "step": 1229 }, { "epoch": 0.09818791410553206, "grad_norm": 0.26907947384853936, "learning_rate": 1.975722925988475e-05, "loss": 0.1753, "step": 1230 }, { "epoch": 0.09826774167797557, "grad_norm": 0.2508362855714164, "learning_rate": 1.9756662695734112e-05, "loss": 0.2351, "step": 1231 }, { "epoch": 0.0983475692504191, "grad_norm": 0.2467469137988345, "learning_rate": 1.9756095479389188e-05, "loss": 0.2798, "step": 1232 }, { "epoch": 0.09842739682286261, "grad_norm": 0.256404497461266, "learning_rate": 1.975552761088789e-05, "loss": 0.24, "step": 1233 }, { "epoch": 0.09850722439530614, "grad_norm": 0.22203261587532663, "learning_rate": 1.975495909026818e-05, "loss": 0.2727, "step": 1234 }, { "epoch": 0.09858705196774967, "grad_norm": 0.19279401865979695, "learning_rate": 1.975438991756806e-05, "loss": 0.2665, "step": 1235 }, { "epoch": 0.09866687954019318, "grad_norm": 0.21536246972848017, "learning_rate": 1.975382009282557e-05, "loss": 0.2109, "step": 1236 }, { "epoch": 0.09874670711263671, "grad_norm": 0.24152221838557456, "learning_rate": 1.9753249616078813e-05, "loss": 0.2269, "step": 1237 }, { "epoch": 0.09882653468508022, "grad_norm": 0.2196444531800243, "learning_rate": 1.975267848736591e-05, "loss": 0.2473, "step": 1238 }, { "epoch": 0.09890636225752375, "grad_norm": 0.2618046080232694, "learning_rate": 1.9752106706725053e-05, "loss": 0.3198, "step": 1239 }, { "epoch": 0.09898618982996726, "grad_norm": 0.19546074570110186, "learning_rate": 1.975153427419445e-05, "loss": 0.2757, "step": 1240 }, { "epoch": 0.09906601740241079, "grad_norm": 0.24580522009713096, "learning_rate": 1.9750961189812377e-05, "loss": 0.2663, "step": 1241 }, { "epoch": 0.09914584497485432, "grad_norm": 0.30079858675630716, "learning_rate": 1.9750387453617135e-05, "loss": 0.2593, "step": 1242 }, { "epoch": 0.09922567254729783, "grad_norm": 0.22955641466521431, "learning_rate": 1.9749813065647077e-05, "loss": 0.2934, "step": 1243 }, { "epoch": 0.09930550011974136, "grad_norm": 0.28424579238159503, "learning_rate": 1.9749238025940602e-05, "loss": 0.2348, "step": 1244 }, { "epoch": 0.09938532769218487, "grad_norm": 0.23187871035477667, "learning_rate": 1.9748662334536142e-05, "loss": 0.2471, "step": 1245 }, { "epoch": 0.0994651552646284, "grad_norm": 0.20450453511776812, "learning_rate": 1.974808599147219e-05, "loss": 0.3417, "step": 1246 }, { "epoch": 0.09954498283707193, "grad_norm": 0.21119794453718654, "learning_rate": 1.9747508996787267e-05, "loss": 0.2734, "step": 1247 }, { "epoch": 0.09962481040951544, "grad_norm": 0.2448899983050294, "learning_rate": 1.9746931350519937e-05, "loss": 0.2669, "step": 1248 }, { "epoch": 0.09970463798195897, "grad_norm": 0.24889337563483258, "learning_rate": 1.9746353052708823e-05, "loss": 0.2289, "step": 1249 }, { "epoch": 0.09978446555440248, "grad_norm": 0.24185596542238538, "learning_rate": 1.9745774103392577e-05, "loss": 0.2653, "step": 1250 }, { "epoch": 0.09986429312684601, "grad_norm": 0.2359703196478843, "learning_rate": 1.9745194502609902e-05, "loss": 0.2657, "step": 1251 }, { "epoch": 0.09994412069928954, "grad_norm": 0.23072277648587297, "learning_rate": 1.974461425039954e-05, "loss": 0.2923, "step": 1252 }, { "epoch": 0.10002394827173305, "grad_norm": 0.21506233138341507, "learning_rate": 1.9744033346800276e-05, "loss": 0.2645, "step": 1253 }, { "epoch": 0.10010377584417658, "grad_norm": 0.27530546633210573, "learning_rate": 1.9743451791850944e-05, "loss": 0.2722, "step": 1254 }, { "epoch": 0.1001836034166201, "grad_norm": 0.23896299634600354, "learning_rate": 1.974286958559042e-05, "loss": 0.2436, "step": 1255 }, { "epoch": 0.10026343098906362, "grad_norm": 0.2238273827289412, "learning_rate": 1.9742286728057623e-05, "loss": 0.2301, "step": 1256 }, { "epoch": 0.10034325856150715, "grad_norm": 0.2741139346719743, "learning_rate": 1.974170321929151e-05, "loss": 0.2493, "step": 1257 }, { "epoch": 0.10042308613395066, "grad_norm": 0.2953119891324947, "learning_rate": 1.974111905933109e-05, "loss": 0.2451, "step": 1258 }, { "epoch": 0.10050291370639419, "grad_norm": 0.23389169826342698, "learning_rate": 1.9740534248215407e-05, "loss": 0.231, "step": 1259 }, { "epoch": 0.1005827412788377, "grad_norm": 0.30626122331592587, "learning_rate": 1.973994878598356e-05, "loss": 0.2613, "step": 1260 }, { "epoch": 0.10066256885128123, "grad_norm": 0.3137864195004622, "learning_rate": 1.9739362672674683e-05, "loss": 0.2577, "step": 1261 }, { "epoch": 0.10074239642372475, "grad_norm": 0.2174744917141328, "learning_rate": 1.9738775908327958e-05, "loss": 0.2762, "step": 1262 }, { "epoch": 0.10082222399616828, "grad_norm": 0.23261752798657848, "learning_rate": 1.9738188492982595e-05, "loss": 0.2004, "step": 1263 }, { "epoch": 0.1009020515686118, "grad_norm": 0.2774491472627652, "learning_rate": 1.9737600426677874e-05, "loss": 0.2681, "step": 1264 }, { "epoch": 0.10098187914105532, "grad_norm": 0.2536058315301513, "learning_rate": 1.97370117094531e-05, "loss": 0.3179, "step": 1265 }, { "epoch": 0.10106170671349884, "grad_norm": 0.24105854524619966, "learning_rate": 1.9736422341347627e-05, "loss": 0.2911, "step": 1266 }, { "epoch": 0.10114153428594236, "grad_norm": 0.2512086785259622, "learning_rate": 1.973583232240085e-05, "loss": 0.2527, "step": 1267 }, { "epoch": 0.10122136185838589, "grad_norm": 0.2822768979928521, "learning_rate": 1.973524165265221e-05, "loss": 0.2816, "step": 1268 }, { "epoch": 0.10130118943082941, "grad_norm": 0.248383791542131, "learning_rate": 1.9734650332141198e-05, "loss": 0.2754, "step": 1269 }, { "epoch": 0.10138101700327293, "grad_norm": 0.21585325972232117, "learning_rate": 1.973405836090733e-05, "loss": 0.2424, "step": 1270 }, { "epoch": 0.10146084457571646, "grad_norm": 0.2705872496813525, "learning_rate": 1.9733465738990182e-05, "loss": 0.2974, "step": 1271 }, { "epoch": 0.10154067214815997, "grad_norm": 0.255396111013037, "learning_rate": 1.973287246642937e-05, "loss": 0.2778, "step": 1272 }, { "epoch": 0.1016204997206035, "grad_norm": 0.24111481792546538, "learning_rate": 1.973227854326455e-05, "loss": 0.3062, "step": 1273 }, { "epoch": 0.10170032729304702, "grad_norm": 0.22755502230671035, "learning_rate": 1.973168396953543e-05, "loss": 0.1843, "step": 1274 }, { "epoch": 0.10178015486549054, "grad_norm": 0.3133179968999531, "learning_rate": 1.9731088745281744e-05, "loss": 0.316, "step": 1275 }, { "epoch": 0.10185998243793407, "grad_norm": 0.267789379491085, "learning_rate": 1.9730492870543284e-05, "loss": 0.265, "step": 1276 }, { "epoch": 0.10193981001037758, "grad_norm": 0.23061734770748002, "learning_rate": 1.9729896345359884e-05, "loss": 0.2985, "step": 1277 }, { "epoch": 0.10201963758282111, "grad_norm": 0.2816271673983704, "learning_rate": 1.972929916977142e-05, "loss": 0.2358, "step": 1278 }, { "epoch": 0.10209946515526463, "grad_norm": 0.28173744849298316, "learning_rate": 1.9728701343817808e-05, "loss": 0.2673, "step": 1279 }, { "epoch": 0.10217929272770815, "grad_norm": 0.2830986156744412, "learning_rate": 1.9728102867539012e-05, "loss": 0.2415, "step": 1280 }, { "epoch": 0.10225912030015168, "grad_norm": 0.25215942585530965, "learning_rate": 1.972750374097504e-05, "loss": 0.2963, "step": 1281 }, { "epoch": 0.10233894787259519, "grad_norm": 0.2700418139800745, "learning_rate": 1.972690396416594e-05, "loss": 0.233, "step": 1282 }, { "epoch": 0.10241877544503872, "grad_norm": 0.2574467940066687, "learning_rate": 1.9726303537151798e-05, "loss": 0.2281, "step": 1283 }, { "epoch": 0.10249860301748223, "grad_norm": 0.2711474770378358, "learning_rate": 1.9725702459972757e-05, "loss": 0.2606, "step": 1284 }, { "epoch": 0.10257843058992576, "grad_norm": 0.27091516769041013, "learning_rate": 1.9725100732668997e-05, "loss": 0.2259, "step": 1285 }, { "epoch": 0.10265825816236929, "grad_norm": 0.1985444589476402, "learning_rate": 1.9724498355280738e-05, "loss": 0.2283, "step": 1286 }, { "epoch": 0.1027380857348128, "grad_norm": 0.24358355831909606, "learning_rate": 1.972389532784825e-05, "loss": 0.2318, "step": 1287 }, { "epoch": 0.10281791330725633, "grad_norm": 0.2598573404448434, "learning_rate": 1.972329165041184e-05, "loss": 0.2261, "step": 1288 }, { "epoch": 0.10289774087969984, "grad_norm": 0.28247796534064623, "learning_rate": 1.9722687323011862e-05, "loss": 0.2283, "step": 1289 }, { "epoch": 0.10297756845214337, "grad_norm": 0.20408046694680698, "learning_rate": 1.9722082345688714e-05, "loss": 0.2945, "step": 1290 }, { "epoch": 0.1030573960245869, "grad_norm": 0.36092323708974583, "learning_rate": 1.9721476718482835e-05, "loss": 0.2312, "step": 1291 }, { "epoch": 0.10313722359703041, "grad_norm": 0.26213516856680197, "learning_rate": 1.9720870441434708e-05, "loss": 0.2182, "step": 1292 }, { "epoch": 0.10321705116947394, "grad_norm": 0.24029708472543376, "learning_rate": 1.9720263514584862e-05, "loss": 0.2616, "step": 1293 }, { "epoch": 0.10329687874191745, "grad_norm": 0.46023265786342493, "learning_rate": 1.971965593797387e-05, "loss": 0.2776, "step": 1294 }, { "epoch": 0.10337670631436098, "grad_norm": 0.22709775379212588, "learning_rate": 1.9719047711642338e-05, "loss": 0.2364, "step": 1295 }, { "epoch": 0.10345653388680451, "grad_norm": 0.24366834864371237, "learning_rate": 1.9718438835630933e-05, "loss": 0.2726, "step": 1296 }, { "epoch": 0.10353636145924802, "grad_norm": 0.3209501403644205, "learning_rate": 1.971782930998035e-05, "loss": 0.285, "step": 1297 }, { "epoch": 0.10361618903169155, "grad_norm": 0.20992574980178957, "learning_rate": 1.9717219134731338e-05, "loss": 0.2285, "step": 1298 }, { "epoch": 0.10369601660413506, "grad_norm": 0.20570314927323555, "learning_rate": 1.971660830992468e-05, "loss": 0.2214, "step": 1299 }, { "epoch": 0.10377584417657859, "grad_norm": 0.22058977388148834, "learning_rate": 1.971599683560121e-05, "loss": 0.2908, "step": 1300 }, { "epoch": 0.10385567174902212, "grad_norm": 0.2565332833739005, "learning_rate": 1.97153847118018e-05, "loss": 0.2265, "step": 1301 }, { "epoch": 0.10393549932146563, "grad_norm": 0.30598581438009337, "learning_rate": 1.9714771938567373e-05, "loss": 0.2438, "step": 1302 }, { "epoch": 0.10401532689390916, "grad_norm": 0.22543529063818793, "learning_rate": 1.9714158515938885e-05, "loss": 0.2587, "step": 1303 }, { "epoch": 0.10409515446635267, "grad_norm": 0.28301886860588926, "learning_rate": 1.971354444395734e-05, "loss": 0.299, "step": 1304 }, { "epoch": 0.1041749820387962, "grad_norm": 0.28464171947769423, "learning_rate": 1.9712929722663796e-05, "loss": 0.2784, "step": 1305 }, { "epoch": 0.10425480961123972, "grad_norm": 0.2012284425025773, "learning_rate": 1.9712314352099334e-05, "loss": 0.2944, "step": 1306 }, { "epoch": 0.10433463718368324, "grad_norm": 0.2763879370692517, "learning_rate": 1.9711698332305094e-05, "loss": 0.2988, "step": 1307 }, { "epoch": 0.10441446475612677, "grad_norm": 0.2790066340885343, "learning_rate": 1.9711081663322255e-05, "loss": 0.2064, "step": 1308 }, { "epoch": 0.10449429232857028, "grad_norm": 0.23395443290889537, "learning_rate": 1.9710464345192037e-05, "loss": 0.239, "step": 1309 }, { "epoch": 0.10457411990101381, "grad_norm": 0.26353457477786946, "learning_rate": 1.9709846377955704e-05, "loss": 0.2534, "step": 1310 }, { "epoch": 0.10465394747345733, "grad_norm": 0.3068070648257504, "learning_rate": 1.970922776165457e-05, "loss": 0.2625, "step": 1311 }, { "epoch": 0.10473377504590085, "grad_norm": 0.2670518792481485, "learning_rate": 1.9708608496329985e-05, "loss": 0.2552, "step": 1312 }, { "epoch": 0.10481360261834438, "grad_norm": 0.21783622427196347, "learning_rate": 1.9707988582023337e-05, "loss": 0.2184, "step": 1313 }, { "epoch": 0.1048934301907879, "grad_norm": 0.34762545825474683, "learning_rate": 1.9707368018776077e-05, "loss": 0.2575, "step": 1314 }, { "epoch": 0.10497325776323142, "grad_norm": 0.27521497857561616, "learning_rate": 1.9706746806629677e-05, "loss": 0.2488, "step": 1315 }, { "epoch": 0.10505308533567494, "grad_norm": 0.20919552651990925, "learning_rate": 1.9706124945625668e-05, "loss": 0.2436, "step": 1316 }, { "epoch": 0.10513291290811846, "grad_norm": 0.28540799082688806, "learning_rate": 1.9705502435805616e-05, "loss": 0.2809, "step": 1317 }, { "epoch": 0.10521274048056199, "grad_norm": 0.2503272801942166, "learning_rate": 1.9704879277211137e-05, "loss": 0.2864, "step": 1318 }, { "epoch": 0.1052925680530055, "grad_norm": 0.24680835816266694, "learning_rate": 1.9704255469883884e-05, "loss": 0.2351, "step": 1319 }, { "epoch": 0.10537239562544903, "grad_norm": 0.248907015087595, "learning_rate": 1.9703631013865557e-05, "loss": 0.231, "step": 1320 }, { "epoch": 0.10545222319789255, "grad_norm": 0.23131857926750204, "learning_rate": 1.97030059091979e-05, "loss": 0.2313, "step": 1321 }, { "epoch": 0.10553205077033607, "grad_norm": 0.23664096872343768, "learning_rate": 1.9702380155922694e-05, "loss": 0.2722, "step": 1322 }, { "epoch": 0.1056118783427796, "grad_norm": 0.24253584870889613, "learning_rate": 1.970175375408177e-05, "loss": 0.271, "step": 1323 }, { "epoch": 0.10569170591522312, "grad_norm": 0.2521902486299176, "learning_rate": 1.9701126703717003e-05, "loss": 0.2536, "step": 1324 }, { "epoch": 0.10577153348766664, "grad_norm": 0.2362623203433632, "learning_rate": 1.9700499004870307e-05, "loss": 0.2223, "step": 1325 }, { "epoch": 0.10585136106011016, "grad_norm": 0.23111282435328265, "learning_rate": 1.9699870657583643e-05, "loss": 0.2589, "step": 1326 }, { "epoch": 0.10593118863255369, "grad_norm": 0.2387612379037137, "learning_rate": 1.9699241661899014e-05, "loss": 0.2492, "step": 1327 }, { "epoch": 0.1060110162049972, "grad_norm": 0.22931055789357443, "learning_rate": 1.9698612017858457e-05, "loss": 0.2359, "step": 1328 }, { "epoch": 0.10609084377744073, "grad_norm": 0.34949340410282237, "learning_rate": 1.9697981725504073e-05, "loss": 0.2521, "step": 1329 }, { "epoch": 0.10617067134988425, "grad_norm": 0.23800235198500325, "learning_rate": 1.969735078487799e-05, "loss": 0.2526, "step": 1330 }, { "epoch": 0.10625049892232777, "grad_norm": 0.24776137969155806, "learning_rate": 1.9696719196022383e-05, "loss": 0.2295, "step": 1331 }, { "epoch": 0.1063303264947713, "grad_norm": 0.32926566354261116, "learning_rate": 1.969608695897947e-05, "loss": 0.3094, "step": 1332 }, { "epoch": 0.10641015406721481, "grad_norm": 0.2851868557170544, "learning_rate": 1.9695454073791517e-05, "loss": 0.2357, "step": 1333 }, { "epoch": 0.10648998163965834, "grad_norm": 0.2620039759189659, "learning_rate": 1.969482054050083e-05, "loss": 0.2688, "step": 1334 }, { "epoch": 0.10656980921210187, "grad_norm": 0.22009292273657638, "learning_rate": 1.9694186359149752e-05, "loss": 0.2321, "step": 1335 }, { "epoch": 0.10664963678454538, "grad_norm": 0.27715376775473766, "learning_rate": 1.969355152978068e-05, "loss": 0.2975, "step": 1336 }, { "epoch": 0.1067294643569889, "grad_norm": 0.28443960022885323, "learning_rate": 1.9692916052436052e-05, "loss": 0.2637, "step": 1337 }, { "epoch": 0.10680929192943242, "grad_norm": 0.20847432286071885, "learning_rate": 1.9692279927158344e-05, "loss": 0.3212, "step": 1338 }, { "epoch": 0.10688911950187595, "grad_norm": 0.25016317431279156, "learning_rate": 1.9691643153990082e-05, "loss": 0.2877, "step": 1339 }, { "epoch": 0.10696894707431948, "grad_norm": 0.37574113974233914, "learning_rate": 1.9691005732973824e-05, "loss": 0.2016, "step": 1340 }, { "epoch": 0.10704877464676299, "grad_norm": 0.24728151356762254, "learning_rate": 1.9690367664152188e-05, "loss": 0.2632, "step": 1341 }, { "epoch": 0.10712860221920652, "grad_norm": 0.28910996395624994, "learning_rate": 1.9689728947567822e-05, "loss": 0.3028, "step": 1342 }, { "epoch": 0.10720842979165003, "grad_norm": 0.29978195956518594, "learning_rate": 1.9689089583263422e-05, "loss": 0.2299, "step": 1343 }, { "epoch": 0.10728825736409356, "grad_norm": 0.26252739695996985, "learning_rate": 1.9688449571281727e-05, "loss": 0.2328, "step": 1344 }, { "epoch": 0.10736808493653709, "grad_norm": 0.24187898655822684, "learning_rate": 1.968780891166552e-05, "loss": 0.2451, "step": 1345 }, { "epoch": 0.1074479125089806, "grad_norm": 0.23518608750056652, "learning_rate": 1.9687167604457622e-05, "loss": 0.1995, "step": 1346 }, { "epoch": 0.10752774008142413, "grad_norm": 0.2410772007867605, "learning_rate": 1.968652564970091e-05, "loss": 0.2433, "step": 1347 }, { "epoch": 0.10760756765386764, "grad_norm": 0.29815774329408157, "learning_rate": 1.968588304743829e-05, "loss": 0.28, "step": 1348 }, { "epoch": 0.10768739522631117, "grad_norm": 0.25262312263832387, "learning_rate": 1.9685239797712722e-05, "loss": 0.2251, "step": 1349 }, { "epoch": 0.10776722279875468, "grad_norm": 0.2798414158669231, "learning_rate": 1.96845959005672e-05, "loss": 0.2497, "step": 1350 }, { "epoch": 0.10784705037119821, "grad_norm": 0.29559324128966713, "learning_rate": 1.968395135604477e-05, "loss": 0.2429, "step": 1351 }, { "epoch": 0.10792687794364174, "grad_norm": 0.2658043375369154, "learning_rate": 1.9683306164188512e-05, "loss": 0.2484, "step": 1352 }, { "epoch": 0.10800670551608525, "grad_norm": 0.22792457724377968, "learning_rate": 1.968266032504156e-05, "loss": 0.2665, "step": 1353 }, { "epoch": 0.10808653308852878, "grad_norm": 0.28918174717586403, "learning_rate": 1.9682013838647082e-05, "loss": 0.2403, "step": 1354 }, { "epoch": 0.1081663606609723, "grad_norm": 0.2922340282486019, "learning_rate": 1.96813667050483e-05, "loss": 0.2543, "step": 1355 }, { "epoch": 0.10824618823341582, "grad_norm": 0.24292596028314098, "learning_rate": 1.9680718924288463e-05, "loss": 0.2572, "step": 1356 }, { "epoch": 0.10832601580585935, "grad_norm": 0.26344460296168704, "learning_rate": 1.9680070496410875e-05, "loss": 0.2355, "step": 1357 }, { "epoch": 0.10840584337830286, "grad_norm": 0.2923144523513973, "learning_rate": 1.9679421421458886e-05, "loss": 0.2416, "step": 1358 }, { "epoch": 0.10848567095074639, "grad_norm": 0.2245611660736429, "learning_rate": 1.9678771699475877e-05, "loss": 0.2273, "step": 1359 }, { "epoch": 0.1085654985231899, "grad_norm": 0.33415311340039966, "learning_rate": 1.9678121330505285e-05, "loss": 0.2316, "step": 1360 }, { "epoch": 0.10864532609563343, "grad_norm": 0.29904358436673417, "learning_rate": 1.967747031459058e-05, "loss": 0.221, "step": 1361 }, { "epoch": 0.10872515366807696, "grad_norm": 0.24358946940945422, "learning_rate": 1.9676818651775285e-05, "loss": 0.2223, "step": 1362 }, { "epoch": 0.10880498124052047, "grad_norm": 0.34473626764285087, "learning_rate": 1.967616634210296e-05, "loss": 0.2225, "step": 1363 }, { "epoch": 0.108884808812964, "grad_norm": 0.27348522939722053, "learning_rate": 1.9675513385617205e-05, "loss": 0.2567, "step": 1364 }, { "epoch": 0.10896463638540752, "grad_norm": 0.25047119278672203, "learning_rate": 1.967485978236167e-05, "loss": 0.2375, "step": 1365 }, { "epoch": 0.10904446395785104, "grad_norm": 0.30635853094845233, "learning_rate": 1.9674205532380047e-05, "loss": 0.2772, "step": 1366 }, { "epoch": 0.10912429153029457, "grad_norm": 0.2579949356593697, "learning_rate": 1.967355063571607e-05, "loss": 0.2315, "step": 1367 }, { "epoch": 0.10920411910273808, "grad_norm": 0.22211364289421934, "learning_rate": 1.9672895092413516e-05, "loss": 0.2988, "step": 1368 }, { "epoch": 0.10928394667518161, "grad_norm": 0.3158459660972098, "learning_rate": 1.9672238902516202e-05, "loss": 0.2534, "step": 1369 }, { "epoch": 0.10936377424762513, "grad_norm": 0.25438817807505926, "learning_rate": 1.9671582066067995e-05, "loss": 0.2285, "step": 1370 }, { "epoch": 0.10944360182006865, "grad_norm": 0.210760837302758, "learning_rate": 1.9670924583112803e-05, "loss": 0.2338, "step": 1371 }, { "epoch": 0.10952342939251217, "grad_norm": 0.2277910561262445, "learning_rate": 1.9670266453694574e-05, "loss": 0.313, "step": 1372 }, { "epoch": 0.1096032569649557, "grad_norm": 0.25582819139010354, "learning_rate": 1.96696076778573e-05, "loss": 0.2515, "step": 1373 }, { "epoch": 0.10968308453739922, "grad_norm": 0.23190580731280155, "learning_rate": 1.966894825564502e-05, "loss": 0.2261, "step": 1374 }, { "epoch": 0.10976291210984274, "grad_norm": 0.23455046594934242, "learning_rate": 1.9668288187101816e-05, "loss": 0.2751, "step": 1375 }, { "epoch": 0.10984273968228626, "grad_norm": 0.250400707140244, "learning_rate": 1.9667627472271804e-05, "loss": 0.1981, "step": 1376 }, { "epoch": 0.10992256725472978, "grad_norm": 0.24434229759723688, "learning_rate": 1.966696611119916e-05, "loss": 0.2337, "step": 1377 }, { "epoch": 0.1100023948271733, "grad_norm": 0.24356295372988368, "learning_rate": 1.9666304103928082e-05, "loss": 0.2341, "step": 1378 }, { "epoch": 0.11008222239961683, "grad_norm": 0.2539123508558787, "learning_rate": 1.966564145050283e-05, "loss": 0.2746, "step": 1379 }, { "epoch": 0.11016204997206035, "grad_norm": 0.3427629730224734, "learning_rate": 1.96649781509677e-05, "loss": 0.2088, "step": 1380 }, { "epoch": 0.11024187754450387, "grad_norm": 0.26576341536439785, "learning_rate": 1.9664314205367027e-05, "loss": 0.2733, "step": 1381 }, { "epoch": 0.11032170511694739, "grad_norm": 0.23593066841101837, "learning_rate": 1.9663649613745196e-05, "loss": 0.2146, "step": 1382 }, { "epoch": 0.11040153268939092, "grad_norm": 0.38322886484798224, "learning_rate": 1.966298437614663e-05, "loss": 0.2415, "step": 1383 }, { "epoch": 0.11048136026183444, "grad_norm": 0.23985982692003738, "learning_rate": 1.9662318492615804e-05, "loss": 0.2643, "step": 1384 }, { "epoch": 0.11056118783427796, "grad_norm": 0.2224322812744934, "learning_rate": 1.9661651963197222e-05, "loss": 0.242, "step": 1385 }, { "epoch": 0.11064101540672149, "grad_norm": 0.34841651719681554, "learning_rate": 1.9660984787935438e-05, "loss": 0.2096, "step": 1386 }, { "epoch": 0.110720842979165, "grad_norm": 0.24470455258307067, "learning_rate": 1.9660316966875057e-05, "loss": 0.2768, "step": 1387 }, { "epoch": 0.11080067055160853, "grad_norm": 0.32519062704647816, "learning_rate": 1.9659648500060715e-05, "loss": 0.2773, "step": 1388 }, { "epoch": 0.11088049812405205, "grad_norm": 0.267582051825145, "learning_rate": 1.96589793875371e-05, "loss": 0.2173, "step": 1389 }, { "epoch": 0.11096032569649557, "grad_norm": 0.34875082884179426, "learning_rate": 1.9658309629348935e-05, "loss": 0.2648, "step": 1390 }, { "epoch": 0.1110401532689391, "grad_norm": 0.2991312187423114, "learning_rate": 1.9657639225540994e-05, "loss": 0.2157, "step": 1391 }, { "epoch": 0.11111998084138261, "grad_norm": 0.29075306761972786, "learning_rate": 1.9656968176158093e-05, "loss": 0.2557, "step": 1392 }, { "epoch": 0.11119980841382614, "grad_norm": 0.34274138370319945, "learning_rate": 1.9656296481245082e-05, "loss": 0.2235, "step": 1393 }, { "epoch": 0.11127963598626965, "grad_norm": 0.26744470200108206, "learning_rate": 1.965562414084687e-05, "loss": 0.2887, "step": 1394 }, { "epoch": 0.11135946355871318, "grad_norm": 0.3105928330001758, "learning_rate": 1.965495115500839e-05, "loss": 0.2353, "step": 1395 }, { "epoch": 0.1114392911311567, "grad_norm": 0.28421115703598937, "learning_rate": 1.9654277523774637e-05, "loss": 0.2148, "step": 1396 }, { "epoch": 0.11151911870360022, "grad_norm": 0.4055952620157112, "learning_rate": 1.9653603247190637e-05, "loss": 0.2009, "step": 1397 }, { "epoch": 0.11159894627604375, "grad_norm": 0.25625914646160747, "learning_rate": 1.9652928325301465e-05, "loss": 0.212, "step": 1398 }, { "epoch": 0.11167877384848726, "grad_norm": 0.22112887878997076, "learning_rate": 1.9652252758152235e-05, "loss": 0.2449, "step": 1399 }, { "epoch": 0.11175860142093079, "grad_norm": 0.2890190011796242, "learning_rate": 1.9651576545788103e-05, "loss": 0.2843, "step": 1400 }, { "epoch": 0.11183842899337432, "grad_norm": 0.2912176540854078, "learning_rate": 1.9650899688254276e-05, "loss": 0.1794, "step": 1401 }, { "epoch": 0.11191825656581783, "grad_norm": 0.20768511951181073, "learning_rate": 1.9650222185596e-05, "loss": 0.3036, "step": 1402 }, { "epoch": 0.11199808413826136, "grad_norm": 0.23087704931728675, "learning_rate": 1.9649544037858557e-05, "loss": 0.2008, "step": 1403 }, { "epoch": 0.11207791171070487, "grad_norm": 0.2898101299118869, "learning_rate": 1.9648865245087284e-05, "loss": 0.2604, "step": 1404 }, { "epoch": 0.1121577392831484, "grad_norm": 0.2620398449760165, "learning_rate": 1.9648185807327554e-05, "loss": 0.2278, "step": 1405 }, { "epoch": 0.11223756685559193, "grad_norm": 0.2219027515231895, "learning_rate": 1.9647505724624785e-05, "loss": 0.2928, "step": 1406 }, { "epoch": 0.11231739442803544, "grad_norm": 0.2337854516700307, "learning_rate": 1.964682499702444e-05, "loss": 0.2383, "step": 1407 }, { "epoch": 0.11239722200047897, "grad_norm": 0.2651524643291356, "learning_rate": 1.9646143624572015e-05, "loss": 0.3066, "step": 1408 }, { "epoch": 0.11247704957292248, "grad_norm": 0.25709368187792625, "learning_rate": 1.9645461607313065e-05, "loss": 0.2588, "step": 1409 }, { "epoch": 0.11255687714536601, "grad_norm": 0.2440088010351633, "learning_rate": 1.964477894529318e-05, "loss": 0.2312, "step": 1410 }, { "epoch": 0.11263670471780954, "grad_norm": 0.259744605113935, "learning_rate": 1.964409563855799e-05, "loss": 0.2351, "step": 1411 }, { "epoch": 0.11271653229025305, "grad_norm": 0.25488796970975947, "learning_rate": 1.964341168715317e-05, "loss": 0.2285, "step": 1412 }, { "epoch": 0.11279635986269658, "grad_norm": 0.26478830418203614, "learning_rate": 1.9642727091124443e-05, "loss": 0.2757, "step": 1413 }, { "epoch": 0.1128761874351401, "grad_norm": 0.2683884946850726, "learning_rate": 1.964204185051757e-05, "loss": 0.2752, "step": 1414 }, { "epoch": 0.11295601500758362, "grad_norm": 0.24642943634010744, "learning_rate": 1.964135596537836e-05, "loss": 0.2533, "step": 1415 }, { "epoch": 0.11303584258002713, "grad_norm": 0.27606171265933416, "learning_rate": 1.9640669435752656e-05, "loss": 0.239, "step": 1416 }, { "epoch": 0.11311567015247066, "grad_norm": 0.3091738598990311, "learning_rate": 1.9639982261686352e-05, "loss": 0.2781, "step": 1417 }, { "epoch": 0.11319549772491419, "grad_norm": 0.22919063122958944, "learning_rate": 1.9639294443225382e-05, "loss": 0.2647, "step": 1418 }, { "epoch": 0.1132753252973577, "grad_norm": 0.20791122696825004, "learning_rate": 1.963860598041573e-05, "loss": 0.25, "step": 1419 }, { "epoch": 0.11335515286980123, "grad_norm": 0.25029038014991345, "learning_rate": 1.9637916873303407e-05, "loss": 0.2324, "step": 1420 }, { "epoch": 0.11343498044224475, "grad_norm": 0.2785456198582712, "learning_rate": 1.9637227121934485e-05, "loss": 0.2714, "step": 1421 }, { "epoch": 0.11351480801468827, "grad_norm": 0.2491044275400209, "learning_rate": 1.9636536726355068e-05, "loss": 0.2483, "step": 1422 }, { "epoch": 0.1135946355871318, "grad_norm": 0.24236126637351907, "learning_rate": 1.9635845686611306e-05, "loss": 0.298, "step": 1423 }, { "epoch": 0.11367446315957531, "grad_norm": 0.2826832007497869, "learning_rate": 1.9635154002749393e-05, "loss": 0.1983, "step": 1424 }, { "epoch": 0.11375429073201884, "grad_norm": 0.2489193821409029, "learning_rate": 1.9634461674815567e-05, "loss": 0.2249, "step": 1425 }, { "epoch": 0.11383411830446236, "grad_norm": 0.27963189550148504, "learning_rate": 1.9633768702856106e-05, "loss": 0.2975, "step": 1426 }, { "epoch": 0.11391394587690588, "grad_norm": 0.27729616493922987, "learning_rate": 1.963307508691733e-05, "loss": 0.2498, "step": 1427 }, { "epoch": 0.11399377344934941, "grad_norm": 0.2510880585903587, "learning_rate": 1.9632380827045606e-05, "loss": 0.225, "step": 1428 }, { "epoch": 0.11407360102179293, "grad_norm": 0.24560160778896006, "learning_rate": 1.9631685923287345e-05, "loss": 0.2262, "step": 1429 }, { "epoch": 0.11415342859423645, "grad_norm": 0.3189323635422516, "learning_rate": 1.9630990375688997e-05, "loss": 0.2554, "step": 1430 }, { "epoch": 0.11423325616667997, "grad_norm": 0.2937561034517561, "learning_rate": 1.9630294184297057e-05, "loss": 0.2458, "step": 1431 }, { "epoch": 0.1143130837391235, "grad_norm": 0.28858861135301284, "learning_rate": 1.9629597349158058e-05, "loss": 0.2389, "step": 1432 }, { "epoch": 0.11439291131156702, "grad_norm": 0.3636767338495921, "learning_rate": 1.9628899870318586e-05, "loss": 0.2477, "step": 1433 }, { "epoch": 0.11447273888401054, "grad_norm": 0.27171231302548476, "learning_rate": 1.9628201747825266e-05, "loss": 0.2376, "step": 1434 }, { "epoch": 0.11455256645645406, "grad_norm": 0.29547398720357737, "learning_rate": 1.962750298172476e-05, "loss": 0.2799, "step": 1435 }, { "epoch": 0.11463239402889758, "grad_norm": 0.3281236423551327, "learning_rate": 1.962680357206378e-05, "loss": 0.2608, "step": 1436 }, { "epoch": 0.1147122216013411, "grad_norm": 0.3445392904072353, "learning_rate": 1.962610351888908e-05, "loss": 0.2702, "step": 1437 }, { "epoch": 0.11479204917378462, "grad_norm": 0.28401799040019293, "learning_rate": 1.962540282224745e-05, "loss": 0.2505, "step": 1438 }, { "epoch": 0.11487187674622815, "grad_norm": 0.38828880232120505, "learning_rate": 1.9624701482185738e-05, "loss": 0.2902, "step": 1439 }, { "epoch": 0.11495170431867167, "grad_norm": 0.30357671986421875, "learning_rate": 1.962399949875082e-05, "loss": 0.273, "step": 1440 }, { "epoch": 0.11503153189111519, "grad_norm": 0.2288887740119723, "learning_rate": 1.962329687198962e-05, "loss": 0.2895, "step": 1441 }, { "epoch": 0.11511135946355872, "grad_norm": 0.28825749807853296, "learning_rate": 1.962259360194911e-05, "loss": 0.2199, "step": 1442 }, { "epoch": 0.11519118703600223, "grad_norm": 0.3243621176780419, "learning_rate": 1.9621889688676298e-05, "loss": 0.3055, "step": 1443 }, { "epoch": 0.11527101460844576, "grad_norm": 0.2546727252531974, "learning_rate": 1.962118513221823e-05, "loss": 0.2959, "step": 1444 }, { "epoch": 0.11535084218088928, "grad_norm": 0.25702473556021416, "learning_rate": 1.962047993262202e-05, "loss": 0.2465, "step": 1445 }, { "epoch": 0.1154306697533328, "grad_norm": 0.340320187374759, "learning_rate": 1.9619774089934798e-05, "loss": 0.2665, "step": 1446 }, { "epoch": 0.11551049732577633, "grad_norm": 0.32329552885691754, "learning_rate": 1.961906760420375e-05, "loss": 0.2072, "step": 1447 }, { "epoch": 0.11559032489821984, "grad_norm": 0.2529458279775635, "learning_rate": 1.9618360475476093e-05, "loss": 0.2397, "step": 1448 }, { "epoch": 0.11567015247066337, "grad_norm": 0.3444082289332255, "learning_rate": 1.9617652703799102e-05, "loss": 0.2766, "step": 1449 }, { "epoch": 0.1157499800431069, "grad_norm": 0.3226097891494022, "learning_rate": 1.9616944289220093e-05, "loss": 0.2557, "step": 1450 }, { "epoch": 0.11582980761555041, "grad_norm": 0.2412432994628697, "learning_rate": 1.9616235231786415e-05, "loss": 0.2274, "step": 1451 }, { "epoch": 0.11590963518799394, "grad_norm": 0.2693280950082269, "learning_rate": 1.961552553154547e-05, "loss": 0.2593, "step": 1452 }, { "epoch": 0.11598946276043745, "grad_norm": 0.28180967220817643, "learning_rate": 1.961481518854469e-05, "loss": 0.3223, "step": 1453 }, { "epoch": 0.11606929033288098, "grad_norm": 0.21898937380061573, "learning_rate": 1.961410420283157e-05, "loss": 0.3366, "step": 1454 }, { "epoch": 0.1161491179053245, "grad_norm": 0.2595596634302402, "learning_rate": 1.9613392574453627e-05, "loss": 0.2516, "step": 1455 }, { "epoch": 0.11622894547776802, "grad_norm": 0.2532243268276946, "learning_rate": 1.961268030345844e-05, "loss": 0.2827, "step": 1456 }, { "epoch": 0.11630877305021155, "grad_norm": 0.3377687344473887, "learning_rate": 1.961196738989361e-05, "loss": 0.237, "step": 1457 }, { "epoch": 0.11638860062265506, "grad_norm": 0.24420440124923898, "learning_rate": 1.9611253833806803e-05, "loss": 0.2258, "step": 1458 }, { "epoch": 0.11646842819509859, "grad_norm": 0.3122443226495106, "learning_rate": 1.9610539635245713e-05, "loss": 0.2673, "step": 1459 }, { "epoch": 0.1165482557675421, "grad_norm": 0.33174498194979496, "learning_rate": 1.9609824794258077e-05, "loss": 0.2578, "step": 1460 }, { "epoch": 0.11662808333998563, "grad_norm": 0.28711153354649, "learning_rate": 1.9609109310891686e-05, "loss": 0.2708, "step": 1461 }, { "epoch": 0.11670791091242916, "grad_norm": 0.3027280245159316, "learning_rate": 1.9608393185194366e-05, "loss": 0.2253, "step": 1462 }, { "epoch": 0.11678773848487267, "grad_norm": 0.31245155560502347, "learning_rate": 1.9607676417213986e-05, "loss": 0.2118, "step": 1463 }, { "epoch": 0.1168675660573162, "grad_norm": 0.26440198863380193, "learning_rate": 1.960695900699846e-05, "loss": 0.2963, "step": 1464 }, { "epoch": 0.11694739362975971, "grad_norm": 0.27218846562725285, "learning_rate": 1.9606240954595738e-05, "loss": 0.282, "step": 1465 }, { "epoch": 0.11702722120220324, "grad_norm": 0.270064871760621, "learning_rate": 1.960552226005383e-05, "loss": 0.2553, "step": 1466 }, { "epoch": 0.11710704877464677, "grad_norm": 0.27675042235862674, "learning_rate": 1.9604802923420768e-05, "loss": 0.3007, "step": 1467 }, { "epoch": 0.11718687634709028, "grad_norm": 0.2212563343446765, "learning_rate": 1.9604082944744644e-05, "loss": 0.2392, "step": 1468 }, { "epoch": 0.11726670391953381, "grad_norm": 0.24894913541747488, "learning_rate": 1.960336232407358e-05, "loss": 0.2938, "step": 1469 }, { "epoch": 0.11734653149197732, "grad_norm": 0.24377757550868964, "learning_rate": 1.960264106145575e-05, "loss": 0.2429, "step": 1470 }, { "epoch": 0.11742635906442085, "grad_norm": 0.2706667116811902, "learning_rate": 1.9601919156939367e-05, "loss": 0.2467, "step": 1471 }, { "epoch": 0.11750618663686438, "grad_norm": 0.2604248723082867, "learning_rate": 1.960119661057269e-05, "loss": 0.2448, "step": 1472 }, { "epoch": 0.11758601420930789, "grad_norm": 0.23649886794165992, "learning_rate": 1.960047342240401e-05, "loss": 0.2693, "step": 1473 }, { "epoch": 0.11766584178175142, "grad_norm": 0.26541024824998444, "learning_rate": 1.959974959248168e-05, "loss": 0.2328, "step": 1474 }, { "epoch": 0.11774566935419493, "grad_norm": 0.26259002805717596, "learning_rate": 1.9599025120854076e-05, "loss": 0.236, "step": 1475 }, { "epoch": 0.11782549692663846, "grad_norm": 0.28453271668545943, "learning_rate": 1.9598300007569634e-05, "loss": 0.2485, "step": 1476 }, { "epoch": 0.11790532449908198, "grad_norm": 0.2827592826240223, "learning_rate": 1.9597574252676815e-05, "loss": 0.2424, "step": 1477 }, { "epoch": 0.1179851520715255, "grad_norm": 0.26051949587625145, "learning_rate": 1.9596847856224143e-05, "loss": 0.2422, "step": 1478 }, { "epoch": 0.11806497964396903, "grad_norm": 0.24365938911190796, "learning_rate": 1.959612081826017e-05, "loss": 0.2084, "step": 1479 }, { "epoch": 0.11814480721641255, "grad_norm": 0.38003834154642974, "learning_rate": 1.9595393138833497e-05, "loss": 0.259, "step": 1480 }, { "epoch": 0.11822463478885607, "grad_norm": 0.26313336688884986, "learning_rate": 1.9594664817992767e-05, "loss": 0.2293, "step": 1481 }, { "epoch": 0.11830446236129959, "grad_norm": 0.30034089288006066, "learning_rate": 1.959393585578666e-05, "loss": 0.2217, "step": 1482 }, { "epoch": 0.11838428993374311, "grad_norm": 0.378482976033964, "learning_rate": 1.9593206252263912e-05, "loss": 0.261, "step": 1483 }, { "epoch": 0.11846411750618664, "grad_norm": 0.3424147477353104, "learning_rate": 1.959247600747329e-05, "loss": 0.2261, "step": 1484 }, { "epoch": 0.11854394507863016, "grad_norm": 0.28892279250081415, "learning_rate": 1.9591745121463607e-05, "loss": 0.248, "step": 1485 }, { "epoch": 0.11862377265107368, "grad_norm": 0.32160605654705365, "learning_rate": 1.959101359428372e-05, "loss": 0.2555, "step": 1486 }, { "epoch": 0.1187036002235172, "grad_norm": 0.2945075725466649, "learning_rate": 1.959028142598253e-05, "loss": 0.3011, "step": 1487 }, { "epoch": 0.11878342779596072, "grad_norm": 0.31488272635637615, "learning_rate": 1.958954861660898e-05, "loss": 0.2298, "step": 1488 }, { "epoch": 0.11886325536840425, "grad_norm": 0.23939136728819665, "learning_rate": 1.9588815166212058e-05, "loss": 0.2715, "step": 1489 }, { "epoch": 0.11894308294084777, "grad_norm": 0.2791428654173272, "learning_rate": 1.9588081074840786e-05, "loss": 0.2201, "step": 1490 }, { "epoch": 0.1190229105132913, "grad_norm": 0.32485450304395985, "learning_rate": 1.958734634254424e-05, "loss": 0.2793, "step": 1491 }, { "epoch": 0.11910273808573481, "grad_norm": 0.31061771104667457, "learning_rate": 1.9586610969371532e-05, "loss": 0.2418, "step": 1492 }, { "epoch": 0.11918256565817834, "grad_norm": 0.24502492004257942, "learning_rate": 1.9585874955371816e-05, "loss": 0.2158, "step": 1493 }, { "epoch": 0.11926239323062186, "grad_norm": 0.38185074495075916, "learning_rate": 1.9585138300594297e-05, "loss": 0.2542, "step": 1494 }, { "epoch": 0.11934222080306538, "grad_norm": 0.2953543313883559, "learning_rate": 1.9584401005088216e-05, "loss": 0.2103, "step": 1495 }, { "epoch": 0.1194220483755089, "grad_norm": 0.21672921671894563, "learning_rate": 1.9583663068902856e-05, "loss": 0.2454, "step": 1496 }, { "epoch": 0.11950187594795242, "grad_norm": 0.28377517122845003, "learning_rate": 1.9582924492087546e-05, "loss": 0.224, "step": 1497 }, { "epoch": 0.11958170352039595, "grad_norm": 0.5025116478368944, "learning_rate": 1.958218527469166e-05, "loss": 0.2233, "step": 1498 }, { "epoch": 0.11966153109283946, "grad_norm": 0.2408306075096078, "learning_rate": 1.9581445416764606e-05, "loss": 0.2467, "step": 1499 }, { "epoch": 0.11974135866528299, "grad_norm": 0.3601073686324315, "learning_rate": 1.9580704918355847e-05, "loss": 0.2485, "step": 1500 }, { "epoch": 0.11982118623772652, "grad_norm": 0.30366183434752575, "learning_rate": 1.9579963779514878e-05, "loss": 0.3311, "step": 1501 }, { "epoch": 0.11990101381017003, "grad_norm": 0.2644767088389791, "learning_rate": 1.957922200029124e-05, "loss": 0.2779, "step": 1502 }, { "epoch": 0.11998084138261356, "grad_norm": 0.2169176284033141, "learning_rate": 1.9578479580734518e-05, "loss": 0.2206, "step": 1503 }, { "epoch": 0.12006066895505707, "grad_norm": 0.3947553526565337, "learning_rate": 1.957773652089435e-05, "loss": 0.2499, "step": 1504 }, { "epoch": 0.1201404965275006, "grad_norm": 0.31119881291449, "learning_rate": 1.9576992820820392e-05, "loss": 0.2195, "step": 1505 }, { "epoch": 0.12022032409994413, "grad_norm": 0.2280096142497346, "learning_rate": 1.957624848056237e-05, "loss": 0.2403, "step": 1506 }, { "epoch": 0.12030015167238764, "grad_norm": 0.36547711971573654, "learning_rate": 1.957550350017003e-05, "loss": 0.2695, "step": 1507 }, { "epoch": 0.12037997924483117, "grad_norm": 0.2392923388482618, "learning_rate": 1.9574757879693176e-05, "loss": 0.3053, "step": 1508 }, { "epoch": 0.12045980681727468, "grad_norm": 0.23347757960197696, "learning_rate": 1.9574011619181646e-05, "loss": 0.2698, "step": 1509 }, { "epoch": 0.12053963438971821, "grad_norm": 0.23728167602669, "learning_rate": 1.9573264718685334e-05, "loss": 0.2225, "step": 1510 }, { "epoch": 0.12061946196216174, "grad_norm": 0.24181565120417978, "learning_rate": 1.9572517178254156e-05, "loss": 0.2451, "step": 1511 }, { "epoch": 0.12069928953460525, "grad_norm": 0.2696314917017837, "learning_rate": 1.957176899793809e-05, "loss": 0.2442, "step": 1512 }, { "epoch": 0.12077911710704878, "grad_norm": 0.24005954868771595, "learning_rate": 1.9571020177787144e-05, "loss": 0.2508, "step": 1513 }, { "epoch": 0.12085894467949229, "grad_norm": 0.2242076439673292, "learning_rate": 1.9570270717851376e-05, "loss": 0.2905, "step": 1514 }, { "epoch": 0.12093877225193582, "grad_norm": 0.21353737955472973, "learning_rate": 1.956952061818089e-05, "loss": 0.2903, "step": 1515 }, { "epoch": 0.12101859982437935, "grad_norm": 0.2373063748729697, "learning_rate": 1.9568769878825814e-05, "loss": 0.2143, "step": 1516 }, { "epoch": 0.12109842739682286, "grad_norm": 0.2445608882349729, "learning_rate": 1.956801849983634e-05, "loss": 0.2501, "step": 1517 }, { "epoch": 0.12117825496926639, "grad_norm": 0.2505074604807907, "learning_rate": 1.9567266481262696e-05, "loss": 0.2243, "step": 1518 }, { "epoch": 0.1212580825417099, "grad_norm": 0.21881831223687867, "learning_rate": 1.9566513823155148e-05, "loss": 0.253, "step": 1519 }, { "epoch": 0.12133791011415343, "grad_norm": 0.24047736807269163, "learning_rate": 1.9565760525564014e-05, "loss": 0.2246, "step": 1520 }, { "epoch": 0.12141773768659694, "grad_norm": 0.26815632346139584, "learning_rate": 1.956500658853964e-05, "loss": 0.267, "step": 1521 }, { "epoch": 0.12149756525904047, "grad_norm": 0.2299479102392492, "learning_rate": 1.956425201213243e-05, "loss": 0.2385, "step": 1522 }, { "epoch": 0.121577392831484, "grad_norm": 0.2404784940800993, "learning_rate": 1.956349679639282e-05, "loss": 0.204, "step": 1523 }, { "epoch": 0.12165722040392751, "grad_norm": 0.23345284035844258, "learning_rate": 1.95627409413713e-05, "loss": 0.2546, "step": 1524 }, { "epoch": 0.12173704797637104, "grad_norm": 0.25255322862176255, "learning_rate": 1.956198444711839e-05, "loss": 0.2325, "step": 1525 }, { "epoch": 0.12181687554881455, "grad_norm": 0.23773139758286727, "learning_rate": 1.956122731368466e-05, "loss": 0.205, "step": 1526 }, { "epoch": 0.12189670312125808, "grad_norm": 0.38842474216501416, "learning_rate": 1.9560469541120722e-05, "loss": 0.2925, "step": 1527 }, { "epoch": 0.12197653069370161, "grad_norm": 0.287630382878101, "learning_rate": 1.955971112947723e-05, "loss": 0.2301, "step": 1528 }, { "epoch": 0.12205635826614512, "grad_norm": 0.22334922620118028, "learning_rate": 1.955895207880488e-05, "loss": 0.233, "step": 1529 }, { "epoch": 0.12213618583858865, "grad_norm": 0.2650578364375669, "learning_rate": 1.9558192389154417e-05, "loss": 0.2656, "step": 1530 }, { "epoch": 0.12221601341103216, "grad_norm": 0.2416828617988139, "learning_rate": 1.9557432060576613e-05, "loss": 0.2552, "step": 1531 }, { "epoch": 0.12229584098347569, "grad_norm": 0.23529901998553163, "learning_rate": 1.95566710931223e-05, "loss": 0.2531, "step": 1532 }, { "epoch": 0.12237566855591922, "grad_norm": 0.24770716863845593, "learning_rate": 1.955590948684235e-05, "loss": 0.197, "step": 1533 }, { "epoch": 0.12245549612836273, "grad_norm": 0.26789792465717016, "learning_rate": 1.955514724178766e-05, "loss": 0.3303, "step": 1534 }, { "epoch": 0.12253532370080626, "grad_norm": 0.2987135169594575, "learning_rate": 1.9554384358009192e-05, "loss": 0.2388, "step": 1535 }, { "epoch": 0.12261515127324978, "grad_norm": 0.24724412046902483, "learning_rate": 1.955362083555794e-05, "loss": 0.2354, "step": 1536 }, { "epoch": 0.1226949788456933, "grad_norm": 0.4188080376193021, "learning_rate": 1.9552856674484946e-05, "loss": 0.217, "step": 1537 }, { "epoch": 0.12277480641813683, "grad_norm": 0.3063146156215621, "learning_rate": 1.9552091874841287e-05, "loss": 0.235, "step": 1538 }, { "epoch": 0.12285463399058034, "grad_norm": 0.2445823116167131, "learning_rate": 1.955132643667809e-05, "loss": 0.2026, "step": 1539 }, { "epoch": 0.12293446156302387, "grad_norm": 0.32617843603435487, "learning_rate": 1.9550560360046512e-05, "loss": 0.2556, "step": 1540 }, { "epoch": 0.12301428913546739, "grad_norm": 0.34638512620807294, "learning_rate": 1.9549793644997776e-05, "loss": 0.2288, "step": 1541 }, { "epoch": 0.12309411670791091, "grad_norm": 0.240597712914975, "learning_rate": 1.9549026291583126e-05, "loss": 0.2411, "step": 1542 }, { "epoch": 0.12317394428035443, "grad_norm": 0.26688019177120004, "learning_rate": 1.954825829985386e-05, "loss": 0.2428, "step": 1543 }, { "epoch": 0.12325377185279796, "grad_norm": 0.32679595949349577, "learning_rate": 1.9547489669861307e-05, "loss": 0.2391, "step": 1544 }, { "epoch": 0.12333359942524148, "grad_norm": 0.27460180348981433, "learning_rate": 1.954672040165686e-05, "loss": 0.2649, "step": 1545 }, { "epoch": 0.123413426997685, "grad_norm": 0.2692875669169209, "learning_rate": 1.954595049529193e-05, "loss": 0.2261, "step": 1546 }, { "epoch": 0.12349325457012852, "grad_norm": 0.3913091905542566, "learning_rate": 1.9545179950817986e-05, "loss": 0.253, "step": 1547 }, { "epoch": 0.12357308214257204, "grad_norm": 0.2875006522611056, "learning_rate": 1.954440876828654e-05, "loss": 0.2609, "step": 1548 }, { "epoch": 0.12365290971501557, "grad_norm": 0.23707423298536132, "learning_rate": 1.9543636947749138e-05, "loss": 0.2334, "step": 1549 }, { "epoch": 0.1237327372874591, "grad_norm": 0.43534855202677386, "learning_rate": 1.9542864489257374e-05, "loss": 0.1536, "step": 1550 }, { "epoch": 0.12381256485990261, "grad_norm": 0.24034868373539744, "learning_rate": 1.9542091392862886e-05, "loss": 0.2624, "step": 1551 }, { "epoch": 0.12389239243234614, "grad_norm": 0.2669051734796923, "learning_rate": 1.9541317658617345e-05, "loss": 0.2109, "step": 1552 }, { "epoch": 0.12397222000478965, "grad_norm": 0.2890374722422179, "learning_rate": 1.954054328657248e-05, "loss": 0.2745, "step": 1553 }, { "epoch": 0.12405204757723318, "grad_norm": 0.30512735955418246, "learning_rate": 1.9539768276780055e-05, "loss": 0.2034, "step": 1554 }, { "epoch": 0.1241318751496767, "grad_norm": 0.2161408775749129, "learning_rate": 1.953899262929187e-05, "loss": 0.2286, "step": 1555 }, { "epoch": 0.12421170272212022, "grad_norm": 0.26160137621740465, "learning_rate": 1.9538216344159782e-05, "loss": 0.2275, "step": 1556 }, { "epoch": 0.12429153029456375, "grad_norm": 0.28071532916134595, "learning_rate": 1.9537439421435677e-05, "loss": 0.2474, "step": 1557 }, { "epoch": 0.12437135786700726, "grad_norm": 0.2445519280747606, "learning_rate": 1.9536661861171488e-05, "loss": 0.2518, "step": 1558 }, { "epoch": 0.12445118543945079, "grad_norm": 0.3257143179977984, "learning_rate": 1.9535883663419197e-05, "loss": 0.2626, "step": 1559 }, { "epoch": 0.12453101301189431, "grad_norm": 0.23875724098545603, "learning_rate": 1.9535104828230818e-05, "loss": 0.2596, "step": 1560 }, { "epoch": 0.12461084058433783, "grad_norm": 0.26531066270144843, "learning_rate": 1.9534325355658413e-05, "loss": 0.1961, "step": 1561 }, { "epoch": 0.12469066815678136, "grad_norm": 0.29394602327605496, "learning_rate": 1.9533545245754096e-05, "loss": 0.263, "step": 1562 }, { "epoch": 0.12477049572922487, "grad_norm": 0.30195964504149686, "learning_rate": 1.953276449857e-05, "loss": 0.2409, "step": 1563 }, { "epoch": 0.1248503233016684, "grad_norm": 0.2763354663844259, "learning_rate": 1.953198311415833e-05, "loss": 0.2824, "step": 1564 }, { "epoch": 0.12493015087411191, "grad_norm": 0.23187476576061308, "learning_rate": 1.953120109257131e-05, "loss": 0.2264, "step": 1565 }, { "epoch": 0.12500997844655545, "grad_norm": 0.27752288290077143, "learning_rate": 1.953041843386121e-05, "loss": 0.2512, "step": 1566 }, { "epoch": 0.12508980601899897, "grad_norm": 0.25466719454032094, "learning_rate": 1.952963513808036e-05, "loss": 0.2197, "step": 1567 }, { "epoch": 0.12516963359144248, "grad_norm": 0.25226404198510194, "learning_rate": 1.952885120528111e-05, "loss": 0.2123, "step": 1568 }, { "epoch": 0.125249461163886, "grad_norm": 0.23565813899761126, "learning_rate": 1.9528066635515865e-05, "loss": 0.2628, "step": 1569 }, { "epoch": 0.12532928873632954, "grad_norm": 0.22731534477131451, "learning_rate": 1.9527281428837077e-05, "loss": 0.2206, "step": 1570 }, { "epoch": 0.12540911630877305, "grad_norm": 0.30357421807378626, "learning_rate": 1.9526495585297225e-05, "loss": 0.2955, "step": 1571 }, { "epoch": 0.12548894388121656, "grad_norm": 0.22821202073971994, "learning_rate": 1.9525709104948846e-05, "loss": 0.2387, "step": 1572 }, { "epoch": 0.1255687714536601, "grad_norm": 0.2704283787731118, "learning_rate": 1.9524921987844508e-05, "loss": 0.2273, "step": 1573 }, { "epoch": 0.12564859902610362, "grad_norm": 0.29193679014995433, "learning_rate": 1.952413423403683e-05, "loss": 0.2139, "step": 1574 }, { "epoch": 0.12572842659854713, "grad_norm": 0.31521751475636123, "learning_rate": 1.9523345843578472e-05, "loss": 0.2135, "step": 1575 }, { "epoch": 0.12580825417099065, "grad_norm": 0.2782870109479925, "learning_rate": 1.952255681652213e-05, "loss": 0.2077, "step": 1576 }, { "epoch": 0.1258880817434342, "grad_norm": 0.4076408559376946, "learning_rate": 1.9521767152920553e-05, "loss": 0.2682, "step": 1577 }, { "epoch": 0.1259679093158777, "grad_norm": 0.24974347740003233, "learning_rate": 1.952097685282652e-05, "loss": 0.2388, "step": 1578 }, { "epoch": 0.12604773688832122, "grad_norm": 0.3024918905005087, "learning_rate": 1.952018591629286e-05, "loss": 0.2418, "step": 1579 }, { "epoch": 0.12612756446076476, "grad_norm": 0.3269040418437745, "learning_rate": 1.9519394343372454e-05, "loss": 0.215, "step": 1580 }, { "epoch": 0.12620739203320827, "grad_norm": 0.3020016133253483, "learning_rate": 1.9518602134118203e-05, "loss": 0.2381, "step": 1581 }, { "epoch": 0.12628721960565178, "grad_norm": 0.3341030984877887, "learning_rate": 1.951780928858307e-05, "loss": 0.2176, "step": 1582 }, { "epoch": 0.12636704717809533, "grad_norm": 0.3204112944229768, "learning_rate": 1.9517015806820052e-05, "loss": 0.2105, "step": 1583 }, { "epoch": 0.12644687475053884, "grad_norm": 0.24184315694365574, "learning_rate": 1.951622168888219e-05, "loss": 0.2075, "step": 1584 }, { "epoch": 0.12652670232298235, "grad_norm": 0.2564700806857274, "learning_rate": 1.951542693482257e-05, "loss": 0.2352, "step": 1585 }, { "epoch": 0.12660652989542587, "grad_norm": 0.26831276417290917, "learning_rate": 1.9514631544694314e-05, "loss": 0.2173, "step": 1586 }, { "epoch": 0.1266863574678694, "grad_norm": 0.2787344076061193, "learning_rate": 1.9513835518550594e-05, "loss": 0.2397, "step": 1587 }, { "epoch": 0.12676618504031292, "grad_norm": 0.2440982390370247, "learning_rate": 1.9513038856444617e-05, "loss": 0.2819, "step": 1588 }, { "epoch": 0.12684601261275644, "grad_norm": 0.28083014243007504, "learning_rate": 1.9512241558429643e-05, "loss": 0.2715, "step": 1589 }, { "epoch": 0.12692584018519998, "grad_norm": 0.3720717756525084, "learning_rate": 1.9511443624558963e-05, "loss": 0.224, "step": 1590 }, { "epoch": 0.1270056677576435, "grad_norm": 0.2929735851157525, "learning_rate": 1.9510645054885918e-05, "loss": 0.1993, "step": 1591 }, { "epoch": 0.127085495330087, "grad_norm": 0.35288429790103876, "learning_rate": 1.950984584946389e-05, "loss": 0.2187, "step": 1592 }, { "epoch": 0.12716532290253055, "grad_norm": 0.3063349464321301, "learning_rate": 1.95090460083463e-05, "loss": 0.2712, "step": 1593 }, { "epoch": 0.12724515047497406, "grad_norm": 0.3113356163774822, "learning_rate": 1.9508245531586613e-05, "loss": 0.2582, "step": 1594 }, { "epoch": 0.12732497804741758, "grad_norm": 0.30037755452081333, "learning_rate": 1.9507444419238343e-05, "loss": 0.2102, "step": 1595 }, { "epoch": 0.1274048056198611, "grad_norm": 0.3219470279720424, "learning_rate": 1.950664267135504e-05, "loss": 0.247, "step": 1596 }, { "epoch": 0.12748463319230463, "grad_norm": 0.35375169260115646, "learning_rate": 1.9505840287990292e-05, "loss": 0.2929, "step": 1597 }, { "epoch": 0.12756446076474814, "grad_norm": 0.3167008097476765, "learning_rate": 1.950503726919774e-05, "loss": 0.225, "step": 1598 }, { "epoch": 0.12764428833719166, "grad_norm": 0.31071176266890177, "learning_rate": 1.9504233615031066e-05, "loss": 0.2412, "step": 1599 }, { "epoch": 0.1277241159096352, "grad_norm": 0.2623208326253748, "learning_rate": 1.9503429325543984e-05, "loss": 0.2515, "step": 1600 }, { "epoch": 0.1278039434820787, "grad_norm": 0.41407710841166695, "learning_rate": 1.9502624400790262e-05, "loss": 0.2379, "step": 1601 }, { "epoch": 0.12788377105452223, "grad_norm": 0.31983431385150995, "learning_rate": 1.9501818840823707e-05, "loss": 0.2754, "step": 1602 }, { "epoch": 0.12796359862696574, "grad_norm": 0.3418408697988335, "learning_rate": 1.950101264569816e-05, "loss": 0.2441, "step": 1603 }, { "epoch": 0.12804342619940928, "grad_norm": 0.3626263310487534, "learning_rate": 1.950020581546752e-05, "loss": 0.2077, "step": 1604 }, { "epoch": 0.1281232537718528, "grad_norm": 0.2348663179434483, "learning_rate": 1.9499398350185717e-05, "loss": 0.2921, "step": 1605 }, { "epoch": 0.1282030813442963, "grad_norm": 0.2592514319469695, "learning_rate": 1.949859024990673e-05, "loss": 0.2515, "step": 1606 }, { "epoch": 0.12828290891673985, "grad_norm": 0.35011077518204897, "learning_rate": 1.9497781514684574e-05, "loss": 0.2014, "step": 1607 }, { "epoch": 0.12836273648918337, "grad_norm": 0.258309011997118, "learning_rate": 1.9496972144573308e-05, "loss": 0.2424, "step": 1608 }, { "epoch": 0.12844256406162688, "grad_norm": 0.24724658295192647, "learning_rate": 1.9496162139627036e-05, "loss": 0.2432, "step": 1609 }, { "epoch": 0.12852239163407042, "grad_norm": 0.30232972890054816, "learning_rate": 1.9495351499899908e-05, "loss": 0.2566, "step": 1610 }, { "epoch": 0.12860221920651393, "grad_norm": 0.3123573967248353, "learning_rate": 1.949454022544611e-05, "loss": 0.2666, "step": 1611 }, { "epoch": 0.12868204677895745, "grad_norm": 0.22945225180216122, "learning_rate": 1.9493728316319873e-05, "loss": 0.2882, "step": 1612 }, { "epoch": 0.12876187435140096, "grad_norm": 0.27693413021672136, "learning_rate": 1.9492915772575463e-05, "loss": 0.2185, "step": 1613 }, { "epoch": 0.1288417019238445, "grad_norm": 0.27964813510363434, "learning_rate": 1.9492102594267208e-05, "loss": 0.2268, "step": 1614 }, { "epoch": 0.12892152949628802, "grad_norm": 0.28836261708950023, "learning_rate": 1.9491288781449454e-05, "loss": 0.2383, "step": 1615 }, { "epoch": 0.12900135706873153, "grad_norm": 0.3003340611608078, "learning_rate": 1.9490474334176607e-05, "loss": 0.1997, "step": 1616 }, { "epoch": 0.12908118464117507, "grad_norm": 0.3814328210004682, "learning_rate": 1.948965925250311e-05, "loss": 0.2325, "step": 1617 }, { "epoch": 0.1291610122136186, "grad_norm": 0.2725669054729272, "learning_rate": 1.9488843536483445e-05, "loss": 0.2528, "step": 1618 }, { "epoch": 0.1292408397860621, "grad_norm": 0.285503985943703, "learning_rate": 1.9488027186172144e-05, "loss": 0.2266, "step": 1619 }, { "epoch": 0.12932066735850561, "grad_norm": 0.3457016884048828, "learning_rate": 1.948721020162377e-05, "loss": 0.3089, "step": 1620 }, { "epoch": 0.12940049493094916, "grad_norm": 0.3146493967626723, "learning_rate": 1.948639258289294e-05, "loss": 0.2206, "step": 1621 }, { "epoch": 0.12948032250339267, "grad_norm": 0.28946137116425397, "learning_rate": 1.948557433003431e-05, "loss": 0.2504, "step": 1622 }, { "epoch": 0.12956015007583618, "grad_norm": 0.276074492347589, "learning_rate": 1.9484755443102573e-05, "loss": 0.2181, "step": 1623 }, { "epoch": 0.12963997764827973, "grad_norm": 0.3104829867177689, "learning_rate": 1.948393592215247e-05, "loss": 0.2215, "step": 1624 }, { "epoch": 0.12971980522072324, "grad_norm": 0.2835398406017777, "learning_rate": 1.9483115767238782e-05, "loss": 0.2583, "step": 1625 }, { "epoch": 0.12979963279316675, "grad_norm": 0.2853545766422544, "learning_rate": 1.9482294978416334e-05, "loss": 0.2328, "step": 1626 }, { "epoch": 0.1298794603656103, "grad_norm": 0.28535395461709506, "learning_rate": 1.948147355573999e-05, "loss": 0.2399, "step": 1627 }, { "epoch": 0.1299592879380538, "grad_norm": 0.2741033188187557, "learning_rate": 1.9480651499264662e-05, "loss": 0.2706, "step": 1628 }, { "epoch": 0.13003911551049732, "grad_norm": 0.3032644984826846, "learning_rate": 1.94798288090453e-05, "loss": 0.2138, "step": 1629 }, { "epoch": 0.13011894308294084, "grad_norm": 0.2804348694902402, "learning_rate": 1.94790054851369e-05, "loss": 0.2431, "step": 1630 }, { "epoch": 0.13019877065538438, "grad_norm": 0.33282345731904184, "learning_rate": 1.9478181527594496e-05, "loss": 0.2507, "step": 1631 }, { "epoch": 0.1302785982278279, "grad_norm": 0.37320906692487643, "learning_rate": 1.9477356936473162e-05, "loss": 0.2409, "step": 1632 }, { "epoch": 0.1303584258002714, "grad_norm": 0.26973689162030995, "learning_rate": 1.9476531711828027e-05, "loss": 0.2616, "step": 1633 }, { "epoch": 0.13043825337271495, "grad_norm": 0.29936519252823646, "learning_rate": 1.9475705853714246e-05, "loss": 0.2354, "step": 1634 }, { "epoch": 0.13051808094515846, "grad_norm": 0.265273728790435, "learning_rate": 1.947487936218703e-05, "loss": 0.2313, "step": 1635 }, { "epoch": 0.13059790851760197, "grad_norm": 0.22636848582017394, "learning_rate": 1.9474052237301622e-05, "loss": 0.2508, "step": 1636 }, { "epoch": 0.13067773609004552, "grad_norm": 0.2577544177178263, "learning_rate": 1.947322447911332e-05, "loss": 0.2454, "step": 1637 }, { "epoch": 0.13075756366248903, "grad_norm": 0.26477983280813217, "learning_rate": 1.947239608767745e-05, "loss": 0.2028, "step": 1638 }, { "epoch": 0.13083739123493254, "grad_norm": 0.28865730029513037, "learning_rate": 1.9471567063049387e-05, "loss": 0.2503, "step": 1639 }, { "epoch": 0.13091721880737606, "grad_norm": 0.2600607828010107, "learning_rate": 1.9470737405284548e-05, "loss": 0.2558, "step": 1640 }, { "epoch": 0.1309970463798196, "grad_norm": 0.3072235337383466, "learning_rate": 1.946990711443839e-05, "loss": 0.2391, "step": 1641 }, { "epoch": 0.1310768739522631, "grad_norm": 0.3212541414222357, "learning_rate": 1.9469076190566422e-05, "loss": 0.2076, "step": 1642 }, { "epoch": 0.13115670152470663, "grad_norm": 0.27413060314080295, "learning_rate": 1.9468244633724184e-05, "loss": 0.2214, "step": 1643 }, { "epoch": 0.13123652909715017, "grad_norm": 0.2996752233780761, "learning_rate": 1.946741244396726e-05, "loss": 0.2438, "step": 1644 }, { "epoch": 0.13131635666959368, "grad_norm": 0.2412017509960082, "learning_rate": 1.946657962135128e-05, "loss": 0.2488, "step": 1645 }, { "epoch": 0.1313961842420372, "grad_norm": 0.4210621934600409, "learning_rate": 1.946574616593192e-05, "loss": 0.248, "step": 1646 }, { "epoch": 0.1314760118144807, "grad_norm": 0.3255646577320878, "learning_rate": 1.9464912077764885e-05, "loss": 0.2734, "step": 1647 }, { "epoch": 0.13155583938692425, "grad_norm": 0.2597621235601612, "learning_rate": 1.9464077356905937e-05, "loss": 0.2227, "step": 1648 }, { "epoch": 0.13163566695936776, "grad_norm": 0.30117727304646663, "learning_rate": 1.9463242003410868e-05, "loss": 0.2364, "step": 1649 }, { "epoch": 0.13171549453181128, "grad_norm": 0.35084150982009477, "learning_rate": 1.9462406017335524e-05, "loss": 0.2093, "step": 1650 }, { "epoch": 0.13179532210425482, "grad_norm": 0.2216791040731515, "learning_rate": 1.946156939873578e-05, "loss": 0.2395, "step": 1651 }, { "epoch": 0.13187514967669833, "grad_norm": 0.24231540718262062, "learning_rate": 1.946073214766757e-05, "loss": 0.2148, "step": 1652 }, { "epoch": 0.13195497724914185, "grad_norm": 0.2861049804491842, "learning_rate": 1.9459894264186852e-05, "loss": 0.2382, "step": 1653 }, { "epoch": 0.1320348048215854, "grad_norm": 0.2978909890304222, "learning_rate": 1.9459055748349643e-05, "loss": 0.2318, "step": 1654 }, { "epoch": 0.1321146323940289, "grad_norm": 0.28706914044206644, "learning_rate": 1.9458216600211988e-05, "loss": 0.2201, "step": 1655 }, { "epoch": 0.13219445996647242, "grad_norm": 0.34462103725118304, "learning_rate": 1.9457376819829985e-05, "loss": 0.2234, "step": 1656 }, { "epoch": 0.13227428753891593, "grad_norm": 0.28834999251698884, "learning_rate": 1.9456536407259768e-05, "loss": 0.2015, "step": 1657 }, { "epoch": 0.13235411511135947, "grad_norm": 0.29682905416577154, "learning_rate": 1.9455695362557515e-05, "loss": 0.2676, "step": 1658 }, { "epoch": 0.13243394268380299, "grad_norm": 0.2759308108500337, "learning_rate": 1.945485368577945e-05, "loss": 0.2507, "step": 1659 }, { "epoch": 0.1325137702562465, "grad_norm": 0.3302083170065318, "learning_rate": 1.9454011376981826e-05, "loss": 0.253, "step": 1660 }, { "epoch": 0.13259359782869004, "grad_norm": 0.28580843426053015, "learning_rate": 1.9453168436220963e-05, "loss": 0.2436, "step": 1661 }, { "epoch": 0.13267342540113355, "grad_norm": 0.3518350147492316, "learning_rate": 1.9452324863553196e-05, "loss": 0.2083, "step": 1662 }, { "epoch": 0.13275325297357707, "grad_norm": 0.2921732801516325, "learning_rate": 1.9451480659034917e-05, "loss": 0.2643, "step": 1663 }, { "epoch": 0.13283308054602058, "grad_norm": 0.2805010583270733, "learning_rate": 1.9450635822722565e-05, "loss": 0.2645, "step": 1664 }, { "epoch": 0.13291290811846412, "grad_norm": 0.30484900633513073, "learning_rate": 1.9449790354672604e-05, "loss": 0.2301, "step": 1665 }, { "epoch": 0.13299273569090764, "grad_norm": 0.36748042585344043, "learning_rate": 1.944894425494156e-05, "loss": 0.2416, "step": 1666 }, { "epoch": 0.13307256326335115, "grad_norm": 0.308460498818325, "learning_rate": 1.944809752358598e-05, "loss": 0.2517, "step": 1667 }, { "epoch": 0.1331523908357947, "grad_norm": 0.2843393324938702, "learning_rate": 1.944725016066247e-05, "loss": 0.2376, "step": 1668 }, { "epoch": 0.1332322184082382, "grad_norm": 0.3502660394103784, "learning_rate": 1.9446402166227678e-05, "loss": 0.2291, "step": 1669 }, { "epoch": 0.13331204598068172, "grad_norm": 0.23995470879622977, "learning_rate": 1.9445553540338286e-05, "loss": 0.2311, "step": 1670 }, { "epoch": 0.13339187355312526, "grad_norm": 0.28493131569854274, "learning_rate": 1.9444704283051012e-05, "loss": 0.2629, "step": 1671 }, { "epoch": 0.13347170112556878, "grad_norm": 0.3425158478092755, "learning_rate": 1.944385439442264e-05, "loss": 0.2709, "step": 1672 }, { "epoch": 0.1335515286980123, "grad_norm": 0.2864596082313145, "learning_rate": 1.9443003874509973e-05, "loss": 0.2502, "step": 1673 }, { "epoch": 0.1336313562704558, "grad_norm": 0.3242014328574535, "learning_rate": 1.9442152723369868e-05, "loss": 0.2541, "step": 1674 }, { "epoch": 0.13371118384289934, "grad_norm": 0.35530618084650933, "learning_rate": 1.9441300941059218e-05, "loss": 0.1938, "step": 1675 }, { "epoch": 0.13379101141534286, "grad_norm": 0.252325703064297, "learning_rate": 1.9440448527634962e-05, "loss": 0.231, "step": 1676 }, { "epoch": 0.13387083898778637, "grad_norm": 0.22237329696321761, "learning_rate": 1.9439595483154087e-05, "loss": 0.2281, "step": 1677 }, { "epoch": 0.13395066656022991, "grad_norm": 0.37628574467039444, "learning_rate": 1.9438741807673606e-05, "loss": 0.2259, "step": 1678 }, { "epoch": 0.13403049413267343, "grad_norm": 0.3353154330147177, "learning_rate": 1.943788750125059e-05, "loss": 0.2304, "step": 1679 }, { "epoch": 0.13411032170511694, "grad_norm": 0.22583731311888336, "learning_rate": 1.9437032563942144e-05, "loss": 0.244, "step": 1680 }, { "epoch": 0.13419014927756048, "grad_norm": 0.27373003146573704, "learning_rate": 1.9436176995805417e-05, "loss": 0.2499, "step": 1681 }, { "epoch": 0.134269976850004, "grad_norm": 0.35238878588708195, "learning_rate": 1.94353207968976e-05, "loss": 0.2375, "step": 1682 }, { "epoch": 0.1343498044224475, "grad_norm": 0.2654415321813867, "learning_rate": 1.943446396727593e-05, "loss": 0.219, "step": 1683 }, { "epoch": 0.13442963199489102, "grad_norm": 0.32298231999753935, "learning_rate": 1.943360650699768e-05, "loss": 0.2001, "step": 1684 }, { "epoch": 0.13450945956733457, "grad_norm": 0.30254836962207393, "learning_rate": 1.9432748416120164e-05, "loss": 0.3157, "step": 1685 }, { "epoch": 0.13458928713977808, "grad_norm": 0.2348757510865748, "learning_rate": 1.943188969470075e-05, "loss": 0.2219, "step": 1686 }, { "epoch": 0.1346691147122216, "grad_norm": 0.29721349781812656, "learning_rate": 1.9431030342796832e-05, "loss": 0.2764, "step": 1687 }, { "epoch": 0.13474894228466514, "grad_norm": 0.35342812113046196, "learning_rate": 1.9430170360465862e-05, "loss": 0.191, "step": 1688 }, { "epoch": 0.13482876985710865, "grad_norm": 0.24639602002290845, "learning_rate": 1.942930974776532e-05, "loss": 0.253, "step": 1689 }, { "epoch": 0.13490859742955216, "grad_norm": 0.3773636464751539, "learning_rate": 1.9428448504752737e-05, "loss": 0.2349, "step": 1690 }, { "epoch": 0.13498842500199568, "grad_norm": 0.2851859439660449, "learning_rate": 1.9427586631485683e-05, "loss": 0.2625, "step": 1691 }, { "epoch": 0.13506825257443922, "grad_norm": 0.3126471233799725, "learning_rate": 1.9426724128021773e-05, "loss": 0.227, "step": 1692 }, { "epoch": 0.13514808014688273, "grad_norm": 0.30204469646033316, "learning_rate": 1.9425860994418663e-05, "loss": 0.2042, "step": 1693 }, { "epoch": 0.13522790771932625, "grad_norm": 0.39192211860521525, "learning_rate": 1.9424997230734045e-05, "loss": 0.2741, "step": 1694 }, { "epoch": 0.1353077352917698, "grad_norm": 0.24986038953129092, "learning_rate": 1.9424132837025663e-05, "loss": 0.2281, "step": 1695 }, { "epoch": 0.1353875628642133, "grad_norm": 0.25107384607315947, "learning_rate": 1.9423267813351293e-05, "loss": 0.2615, "step": 1696 }, { "epoch": 0.13546739043665681, "grad_norm": 0.33223308687898495, "learning_rate": 1.942240215976876e-05, "loss": 0.2594, "step": 1697 }, { "epoch": 0.13554721800910036, "grad_norm": 0.30646989131228297, "learning_rate": 1.9421535876335936e-05, "loss": 0.2482, "step": 1698 }, { "epoch": 0.13562704558154387, "grad_norm": 0.24248148613691892, "learning_rate": 1.9420668963110723e-05, "loss": 0.2707, "step": 1699 }, { "epoch": 0.13570687315398738, "grad_norm": 0.23139032876371382, "learning_rate": 1.941980142015107e-05, "loss": 0.2355, "step": 1700 }, { "epoch": 0.1357867007264309, "grad_norm": 0.28243327855441946, "learning_rate": 1.941893324751497e-05, "loss": 0.2758, "step": 1701 }, { "epoch": 0.13586652829887444, "grad_norm": 0.34353572639378627, "learning_rate": 1.9418064445260458e-05, "loss": 0.2279, "step": 1702 }, { "epoch": 0.13594635587131795, "grad_norm": 0.2637746856227854, "learning_rate": 1.9417195013445605e-05, "loss": 0.2799, "step": 1703 }, { "epoch": 0.13602618344376147, "grad_norm": 0.3002070888854798, "learning_rate": 1.9416324952128538e-05, "loss": 0.2331, "step": 1704 }, { "epoch": 0.136106011016205, "grad_norm": 0.2293353336770063, "learning_rate": 1.941545426136741e-05, "loss": 0.2339, "step": 1705 }, { "epoch": 0.13618583858864852, "grad_norm": 0.28596914209259866, "learning_rate": 1.941458294122043e-05, "loss": 0.2153, "step": 1706 }, { "epoch": 0.13626566616109204, "grad_norm": 0.2953024865593668, "learning_rate": 1.941371099174583e-05, "loss": 0.2505, "step": 1707 }, { "epoch": 0.13634549373353555, "grad_norm": 0.3704697305886515, "learning_rate": 1.941283841300191e-05, "loss": 0.2618, "step": 1708 }, { "epoch": 0.1364253213059791, "grad_norm": 0.21977460513334254, "learning_rate": 1.941196520504699e-05, "loss": 0.2564, "step": 1709 }, { "epoch": 0.1365051488784226, "grad_norm": 0.2900414509922268, "learning_rate": 1.9411091367939443e-05, "loss": 0.3107, "step": 1710 }, { "epoch": 0.13658497645086612, "grad_norm": 0.318940062466602, "learning_rate": 1.9410216901737686e-05, "loss": 0.3049, "step": 1711 }, { "epoch": 0.13666480402330966, "grad_norm": 0.32575565400506207, "learning_rate": 1.9409341806500164e-05, "loss": 0.2679, "step": 1712 }, { "epoch": 0.13674463159575317, "grad_norm": 0.28652273944669854, "learning_rate": 1.940846608228538e-05, "loss": 0.2331, "step": 1713 }, { "epoch": 0.1368244591681967, "grad_norm": 0.2511266292688211, "learning_rate": 1.9407589729151874e-05, "loss": 0.2001, "step": 1714 }, { "epoch": 0.13690428674064023, "grad_norm": 0.31552252375497153, "learning_rate": 1.9406712747158223e-05, "loss": 0.2801, "step": 1715 }, { "epoch": 0.13698411431308374, "grad_norm": 0.3043985654915529, "learning_rate": 1.9405835136363054e-05, "loss": 0.2055, "step": 1716 }, { "epoch": 0.13706394188552726, "grad_norm": 0.24571891608463006, "learning_rate": 1.9404956896825025e-05, "loss": 0.1948, "step": 1717 }, { "epoch": 0.13714376945797077, "grad_norm": 0.2760927537700574, "learning_rate": 1.940407802860285e-05, "loss": 0.2637, "step": 1718 }, { "epoch": 0.1372235970304143, "grad_norm": 0.2516913330703124, "learning_rate": 1.940319853175527e-05, "loss": 0.2311, "step": 1719 }, { "epoch": 0.13730342460285783, "grad_norm": 0.3198996481707239, "learning_rate": 1.9402318406341086e-05, "loss": 0.2213, "step": 1720 }, { "epoch": 0.13738325217530134, "grad_norm": 0.268720066568432, "learning_rate": 1.9401437652419123e-05, "loss": 0.2363, "step": 1721 }, { "epoch": 0.13746307974774488, "grad_norm": 0.34002997967323745, "learning_rate": 1.9400556270048257e-05, "loss": 0.2264, "step": 1722 }, { "epoch": 0.1375429073201884, "grad_norm": 0.24220223904357496, "learning_rate": 1.939967425928741e-05, "loss": 0.2337, "step": 1723 }, { "epoch": 0.1376227348926319, "grad_norm": 0.2793172748978861, "learning_rate": 1.9398791620195535e-05, "loss": 0.2425, "step": 1724 }, { "epoch": 0.13770256246507542, "grad_norm": 0.3372399532026331, "learning_rate": 1.9397908352831633e-05, "loss": 0.2749, "step": 1725 }, { "epoch": 0.13778239003751896, "grad_norm": 0.25251608858129326, "learning_rate": 1.939702445725475e-05, "loss": 0.2922, "step": 1726 }, { "epoch": 0.13786221760996248, "grad_norm": 0.2900969211799986, "learning_rate": 1.9396139933523975e-05, "loss": 0.1936, "step": 1727 }, { "epoch": 0.137942045182406, "grad_norm": 0.34536060096279153, "learning_rate": 1.9395254781698424e-05, "loss": 0.275, "step": 1728 }, { "epoch": 0.13802187275484953, "grad_norm": 0.3066540115138444, "learning_rate": 1.9394369001837276e-05, "loss": 0.2438, "step": 1729 }, { "epoch": 0.13810170032729305, "grad_norm": 0.28311709712702504, "learning_rate": 1.9393482593999734e-05, "loss": 0.2489, "step": 1730 }, { "epoch": 0.13818152789973656, "grad_norm": 0.2845787567240693, "learning_rate": 1.9392595558245057e-05, "loss": 0.248, "step": 1731 }, { "epoch": 0.1382613554721801, "grad_norm": 0.2754442317778633, "learning_rate": 1.9391707894632538e-05, "loss": 0.2744, "step": 1732 }, { "epoch": 0.13834118304462362, "grad_norm": 0.26353129807678805, "learning_rate": 1.9390819603221513e-05, "loss": 0.1894, "step": 1733 }, { "epoch": 0.13842101061706713, "grad_norm": 0.24347522024088322, "learning_rate": 1.9389930684071358e-05, "loss": 0.2567, "step": 1734 }, { "epoch": 0.13850083818951064, "grad_norm": 0.3140236319626082, "learning_rate": 1.9389041137241498e-05, "loss": 0.1945, "step": 1735 }, { "epoch": 0.13858066576195419, "grad_norm": 0.30918190297906994, "learning_rate": 1.9388150962791396e-05, "loss": 0.2752, "step": 1736 }, { "epoch": 0.1386604933343977, "grad_norm": 0.31910010558803226, "learning_rate": 1.9387260160780555e-05, "loss": 0.2554, "step": 1737 }, { "epoch": 0.1387403209068412, "grad_norm": 0.2952924418149191, "learning_rate": 1.9386368731268518e-05, "loss": 0.2234, "step": 1738 }, { "epoch": 0.13882014847928476, "grad_norm": 0.3045260118648607, "learning_rate": 1.9385476674314883e-05, "loss": 0.2115, "step": 1739 }, { "epoch": 0.13889997605172827, "grad_norm": 0.28608774406213167, "learning_rate": 1.9384583989979272e-05, "loss": 0.2301, "step": 1740 }, { "epoch": 0.13897980362417178, "grad_norm": 0.31929105655686596, "learning_rate": 1.938369067832136e-05, "loss": 0.2122, "step": 1741 }, { "epoch": 0.13905963119661532, "grad_norm": 0.33418158784981405, "learning_rate": 1.9382796739400856e-05, "loss": 0.2665, "step": 1742 }, { "epoch": 0.13913945876905884, "grad_norm": 0.3124216091526971, "learning_rate": 1.938190217327753e-05, "loss": 0.2262, "step": 1743 }, { "epoch": 0.13921928634150235, "grad_norm": 0.3916853941171849, "learning_rate": 1.9381006980011166e-05, "loss": 0.2167, "step": 1744 }, { "epoch": 0.13929911391394587, "grad_norm": 0.27564041145626533, "learning_rate": 1.9380111159661612e-05, "loss": 0.2411, "step": 1745 }, { "epoch": 0.1393789414863894, "grad_norm": 0.26404766350591624, "learning_rate": 1.937921471228875e-05, "loss": 0.2141, "step": 1746 }, { "epoch": 0.13945876905883292, "grad_norm": 0.33238423917996346, "learning_rate": 1.93783176379525e-05, "loss": 0.2331, "step": 1747 }, { "epoch": 0.13953859663127643, "grad_norm": 0.24316188083798143, "learning_rate": 1.9377419936712832e-05, "loss": 0.1981, "step": 1748 }, { "epoch": 0.13961842420371998, "grad_norm": 0.28069966296061494, "learning_rate": 1.9376521608629748e-05, "loss": 0.2043, "step": 1749 }, { "epoch": 0.1396982517761635, "grad_norm": 0.3065756972884636, "learning_rate": 1.9375622653763303e-05, "loss": 0.2352, "step": 1750 }, { "epoch": 0.139778079348607, "grad_norm": 0.31443243404012766, "learning_rate": 1.9374723072173586e-05, "loss": 0.185, "step": 1751 }, { "epoch": 0.13985790692105052, "grad_norm": 0.2582775768129008, "learning_rate": 1.9373822863920732e-05, "loss": 0.1958, "step": 1752 }, { "epoch": 0.13993773449349406, "grad_norm": 0.2826313111593153, "learning_rate": 1.9372922029064918e-05, "loss": 0.2689, "step": 1753 }, { "epoch": 0.14001756206593757, "grad_norm": 0.2930960017521221, "learning_rate": 1.9372020567666355e-05, "loss": 0.2138, "step": 1754 }, { "epoch": 0.1400973896383811, "grad_norm": 0.26225028100099207, "learning_rate": 1.937111847978531e-05, "loss": 0.2283, "step": 1755 }, { "epoch": 0.14017721721082463, "grad_norm": 0.2513246573427821, "learning_rate": 1.937021576548208e-05, "loss": 0.1956, "step": 1756 }, { "epoch": 0.14025704478326814, "grad_norm": 0.30791032545397495, "learning_rate": 1.9369312424817005e-05, "loss": 0.2417, "step": 1757 }, { "epoch": 0.14033687235571166, "grad_norm": 0.2651579281435177, "learning_rate": 1.9368408457850474e-05, "loss": 0.2227, "step": 1758 }, { "epoch": 0.1404166999281552, "grad_norm": 0.2681044095945749, "learning_rate": 1.9367503864642913e-05, "loss": 0.2174, "step": 1759 }, { "epoch": 0.1404965275005987, "grad_norm": 0.29113985555277977, "learning_rate": 1.9366598645254788e-05, "loss": 0.2157, "step": 1760 }, { "epoch": 0.14057635507304223, "grad_norm": 0.2930462717012864, "learning_rate": 1.9365692799746613e-05, "loss": 0.2452, "step": 1761 }, { "epoch": 0.14065618264548574, "grad_norm": 0.30502495603465624, "learning_rate": 1.936478632817894e-05, "loss": 0.2036, "step": 1762 }, { "epoch": 0.14073601021792928, "grad_norm": 0.26000787254912255, "learning_rate": 1.9363879230612355e-05, "loss": 0.2296, "step": 1763 }, { "epoch": 0.1408158377903728, "grad_norm": 0.3009164211489364, "learning_rate": 1.9362971507107507e-05, "loss": 0.1827, "step": 1764 }, { "epoch": 0.1408956653628163, "grad_norm": 0.24283801821403012, "learning_rate": 1.9362063157725064e-05, "loss": 0.2504, "step": 1765 }, { "epoch": 0.14097549293525985, "grad_norm": 0.34645781381149393, "learning_rate": 1.936115418252575e-05, "loss": 0.3062, "step": 1766 }, { "epoch": 0.14105532050770336, "grad_norm": 0.23285071206785088, "learning_rate": 1.9360244581570323e-05, "loss": 0.2093, "step": 1767 }, { "epoch": 0.14113514808014688, "grad_norm": 0.3747120882326922, "learning_rate": 1.9359334354919587e-05, "loss": 0.2471, "step": 1768 }, { "epoch": 0.1412149756525904, "grad_norm": 0.27115526631486614, "learning_rate": 1.9358423502634393e-05, "loss": 0.2357, "step": 1769 }, { "epoch": 0.14129480322503393, "grad_norm": 0.2641643683367182, "learning_rate": 1.9357512024775617e-05, "loss": 0.2334, "step": 1770 }, { "epoch": 0.14137463079747745, "grad_norm": 0.2615909711769694, "learning_rate": 1.93565999214042e-05, "loss": 0.2344, "step": 1771 }, { "epoch": 0.14145445836992096, "grad_norm": 0.29777324835164604, "learning_rate": 1.9355687192581102e-05, "loss": 0.2606, "step": 1772 }, { "epoch": 0.1415342859423645, "grad_norm": 0.26679741374993377, "learning_rate": 1.9354773838367342e-05, "loss": 0.2716, "step": 1773 }, { "epoch": 0.14161411351480802, "grad_norm": 0.27069440000955347, "learning_rate": 1.935385985882397e-05, "loss": 0.2002, "step": 1774 }, { "epoch": 0.14169394108725153, "grad_norm": 0.2615436124418343, "learning_rate": 1.9352945254012084e-05, "loss": 0.2054, "step": 1775 }, { "epoch": 0.14177376865969507, "grad_norm": 0.31232739911331864, "learning_rate": 1.9352030023992824e-05, "loss": 0.2722, "step": 1776 }, { "epoch": 0.14185359623213858, "grad_norm": 0.30475311086935275, "learning_rate": 1.935111416882736e-05, "loss": 0.2187, "step": 1777 }, { "epoch": 0.1419334238045821, "grad_norm": 0.3719358561163144, "learning_rate": 1.9350197688576927e-05, "loss": 0.1966, "step": 1778 }, { "epoch": 0.1420132513770256, "grad_norm": 0.3462901884928945, "learning_rate": 1.9349280583302778e-05, "loss": 0.258, "step": 1779 }, { "epoch": 0.14209307894946915, "grad_norm": 0.30704321592167844, "learning_rate": 1.9348362853066223e-05, "loss": 0.2393, "step": 1780 }, { "epoch": 0.14217290652191267, "grad_norm": 0.2645335857693025, "learning_rate": 1.9347444497928603e-05, "loss": 0.2612, "step": 1781 }, { "epoch": 0.14225273409435618, "grad_norm": 0.3858479359662678, "learning_rate": 1.9346525517951313e-05, "loss": 0.2254, "step": 1782 }, { "epoch": 0.14233256166679972, "grad_norm": 0.28549354262530113, "learning_rate": 1.934560591319578e-05, "loss": 0.241, "step": 1783 }, { "epoch": 0.14241238923924324, "grad_norm": 0.24980003729960995, "learning_rate": 1.9344685683723477e-05, "loss": 0.2419, "step": 1784 }, { "epoch": 0.14249221681168675, "grad_norm": 0.35534186718767835, "learning_rate": 1.9343764829595914e-05, "loss": 0.2212, "step": 1785 }, { "epoch": 0.1425720443841303, "grad_norm": 0.28976982514590743, "learning_rate": 1.9342843350874655e-05, "loss": 0.2429, "step": 1786 }, { "epoch": 0.1426518719565738, "grad_norm": 0.25294448156531923, "learning_rate": 1.9341921247621286e-05, "loss": 0.2271, "step": 1787 }, { "epoch": 0.14273169952901732, "grad_norm": 0.3980046918850365, "learning_rate": 1.9340998519897454e-05, "loss": 0.2089, "step": 1788 }, { "epoch": 0.14281152710146083, "grad_norm": 0.271705715099576, "learning_rate": 1.9340075167764833e-05, "loss": 0.2214, "step": 1789 }, { "epoch": 0.14289135467390437, "grad_norm": 0.31531502855889687, "learning_rate": 1.9339151191285154e-05, "loss": 0.2401, "step": 1790 }, { "epoch": 0.1429711822463479, "grad_norm": 0.287186479877807, "learning_rate": 1.9338226590520178e-05, "loss": 0.2628, "step": 1791 }, { "epoch": 0.1430510098187914, "grad_norm": 0.31185051558173416, "learning_rate": 1.933730136553171e-05, "loss": 0.2333, "step": 1792 }, { "epoch": 0.14313083739123494, "grad_norm": 0.3672381114163903, "learning_rate": 1.9336375516381593e-05, "loss": 0.2444, "step": 1793 }, { "epoch": 0.14321066496367846, "grad_norm": 0.3028596321470419, "learning_rate": 1.9335449043131723e-05, "loss": 0.2149, "step": 1794 }, { "epoch": 0.14329049253612197, "grad_norm": 0.31832808480793223, "learning_rate": 1.933452194584403e-05, "loss": 0.1996, "step": 1795 }, { "epoch": 0.14337032010856549, "grad_norm": 0.2854476914454695, "learning_rate": 1.933359422458048e-05, "loss": 0.2508, "step": 1796 }, { "epoch": 0.14345014768100903, "grad_norm": 0.33207271647573655, "learning_rate": 1.93326658794031e-05, "loss": 0.243, "step": 1797 }, { "epoch": 0.14352997525345254, "grad_norm": 0.2694776997917339, "learning_rate": 1.9331736910373937e-05, "loss": 0.2762, "step": 1798 }, { "epoch": 0.14360980282589605, "grad_norm": 0.3509966238079476, "learning_rate": 1.933080731755509e-05, "loss": 0.2505, "step": 1799 }, { "epoch": 0.1436896303983396, "grad_norm": 0.2916460538152938, "learning_rate": 1.93298771010087e-05, "loss": 0.2529, "step": 1800 }, { "epoch": 0.1437694579707831, "grad_norm": 0.3084885000773401, "learning_rate": 1.9328946260796948e-05, "loss": 0.2192, "step": 1801 }, { "epoch": 0.14384928554322662, "grad_norm": 0.2841119931790696, "learning_rate": 1.932801479698206e-05, "loss": 0.26, "step": 1802 }, { "epoch": 0.14392911311567017, "grad_norm": 0.39386008105379144, "learning_rate": 1.932708270962629e-05, "loss": 0.1906, "step": 1803 }, { "epoch": 0.14400894068811368, "grad_norm": 0.33090985313521565, "learning_rate": 1.932614999879196e-05, "loss": 0.2464, "step": 1804 }, { "epoch": 0.1440887682605572, "grad_norm": 0.29915909743165703, "learning_rate": 1.932521666454141e-05, "loss": 0.2443, "step": 1805 }, { "epoch": 0.1441685958330007, "grad_norm": 0.2856255345870501, "learning_rate": 1.9324282706937026e-05, "loss": 0.2245, "step": 1806 }, { "epoch": 0.14424842340544425, "grad_norm": 0.281483413166732, "learning_rate": 1.9323348126041243e-05, "loss": 0.2441, "step": 1807 }, { "epoch": 0.14432825097788776, "grad_norm": 0.2726487620496647, "learning_rate": 1.9322412921916534e-05, "loss": 0.2261, "step": 1808 }, { "epoch": 0.14440807855033128, "grad_norm": 0.2924492343226611, "learning_rate": 1.9321477094625417e-05, "loss": 0.2506, "step": 1809 }, { "epoch": 0.14448790612277482, "grad_norm": 0.2926028413256234, "learning_rate": 1.9320540644230442e-05, "loss": 0.2195, "step": 1810 }, { "epoch": 0.14456773369521833, "grad_norm": 0.31642027977095855, "learning_rate": 1.9319603570794214e-05, "loss": 0.2341, "step": 1811 }, { "epoch": 0.14464756126766184, "grad_norm": 0.2644865841788457, "learning_rate": 1.9318665874379368e-05, "loss": 0.2229, "step": 1812 }, { "epoch": 0.14472738884010536, "grad_norm": 0.3296339640380945, "learning_rate": 1.9317727555048585e-05, "loss": 0.2675, "step": 1813 }, { "epoch": 0.1448072164125489, "grad_norm": 0.3147490674571474, "learning_rate": 1.931678861286459e-05, "loss": 0.2377, "step": 1814 }, { "epoch": 0.14488704398499241, "grad_norm": 0.2932261045311067, "learning_rate": 1.9315849047890145e-05, "loss": 0.2742, "step": 1815 }, { "epoch": 0.14496687155743593, "grad_norm": 0.23733268857184564, "learning_rate": 1.931490886018806e-05, "loss": 0.2874, "step": 1816 }, { "epoch": 0.14504669912987947, "grad_norm": 0.231250272260752, "learning_rate": 1.9313968049821178e-05, "loss": 0.2263, "step": 1817 }, { "epoch": 0.14512652670232298, "grad_norm": 0.3320154071154961, "learning_rate": 1.931302661685239e-05, "loss": 0.272, "step": 1818 }, { "epoch": 0.1452063542747665, "grad_norm": 0.35302899941454624, "learning_rate": 1.9312084561344635e-05, "loss": 0.2125, "step": 1819 }, { "epoch": 0.14528618184721004, "grad_norm": 0.35769791153390423, "learning_rate": 1.9311141883360873e-05, "loss": 0.298, "step": 1820 }, { "epoch": 0.14536600941965355, "grad_norm": 0.28696905907382836, "learning_rate": 1.931019858296413e-05, "loss": 0.2304, "step": 1821 }, { "epoch": 0.14544583699209707, "grad_norm": 0.27144124800104885, "learning_rate": 1.9309254660217448e-05, "loss": 0.2336, "step": 1822 }, { "epoch": 0.14552566456454058, "grad_norm": 0.2783840148578777, "learning_rate": 1.9308310115183938e-05, "loss": 0.269, "step": 1823 }, { "epoch": 0.14560549213698412, "grad_norm": 0.2789644673082068, "learning_rate": 1.9307364947926734e-05, "loss": 0.2479, "step": 1824 }, { "epoch": 0.14568531970942764, "grad_norm": 0.27140236010357993, "learning_rate": 1.9306419158509013e-05, "loss": 0.2123, "step": 1825 }, { "epoch": 0.14576514728187115, "grad_norm": 0.277093709389878, "learning_rate": 1.9305472746994003e-05, "loss": 0.2302, "step": 1826 }, { "epoch": 0.1458449748543147, "grad_norm": 0.3099702126798092, "learning_rate": 1.9304525713444965e-05, "loss": 0.2087, "step": 1827 }, { "epoch": 0.1459248024267582, "grad_norm": 0.30460976897590125, "learning_rate": 1.9303578057925204e-05, "loss": 0.2512, "step": 1828 }, { "epoch": 0.14600462999920172, "grad_norm": 0.279807307575084, "learning_rate": 1.930262978049807e-05, "loss": 0.2364, "step": 1829 }, { "epoch": 0.14608445757164526, "grad_norm": 0.29094607759282654, "learning_rate": 1.930168088122695e-05, "loss": 0.2343, "step": 1830 }, { "epoch": 0.14616428514408877, "grad_norm": 0.29424191033472846, "learning_rate": 1.9300731360175273e-05, "loss": 0.2301, "step": 1831 }, { "epoch": 0.1462441127165323, "grad_norm": 0.29530339278187445, "learning_rate": 1.929978121740651e-05, "loss": 0.1909, "step": 1832 }, { "epoch": 0.1463239402889758, "grad_norm": 0.338949315484635, "learning_rate": 1.9298830452984175e-05, "loss": 0.3222, "step": 1833 }, { "epoch": 0.14640376786141934, "grad_norm": 0.395186277172204, "learning_rate": 1.929787906697183e-05, "loss": 0.2022, "step": 1834 }, { "epoch": 0.14648359543386286, "grad_norm": 0.27199068102354973, "learning_rate": 1.9296927059433058e-05, "loss": 0.208, "step": 1835 }, { "epoch": 0.14656342300630637, "grad_norm": 0.2695415033269523, "learning_rate": 1.9295974430431507e-05, "loss": 0.1959, "step": 1836 }, { "epoch": 0.1466432505787499, "grad_norm": 0.23104704287358993, "learning_rate": 1.929502118003085e-05, "loss": 0.2508, "step": 1837 }, { "epoch": 0.14672307815119343, "grad_norm": 0.2784183508064453, "learning_rate": 1.9294067308294817e-05, "loss": 0.242, "step": 1838 }, { "epoch": 0.14680290572363694, "grad_norm": 0.2804989428262968, "learning_rate": 1.929311281528716e-05, "loss": 0.2869, "step": 1839 }, { "epoch": 0.14688273329608045, "grad_norm": 0.373857713842741, "learning_rate": 1.929215770107169e-05, "loss": 0.3149, "step": 1840 }, { "epoch": 0.146962560868524, "grad_norm": 0.23538950268533332, "learning_rate": 1.9291201965712253e-05, "loss": 0.2647, "step": 1841 }, { "epoch": 0.1470423884409675, "grad_norm": 0.25809971097254575, "learning_rate": 1.9290245609272734e-05, "loss": 0.2066, "step": 1842 }, { "epoch": 0.14712221601341102, "grad_norm": 0.3011846884433556, "learning_rate": 1.9289288631817057e-05, "loss": 0.2719, "step": 1843 }, { "epoch": 0.14720204358585456, "grad_norm": 0.3045067833671756, "learning_rate": 1.92883310334092e-05, "loss": 0.2443, "step": 1844 }, { "epoch": 0.14728187115829808, "grad_norm": 0.276836913833989, "learning_rate": 1.928737281411317e-05, "loss": 0.2222, "step": 1845 }, { "epoch": 0.1473616987307416, "grad_norm": 0.27795004791801786, "learning_rate": 1.928641397399302e-05, "loss": 0.2324, "step": 1846 }, { "epoch": 0.14744152630318513, "grad_norm": 0.3425157335421307, "learning_rate": 1.928545451311285e-05, "loss": 0.2668, "step": 1847 }, { "epoch": 0.14752135387562865, "grad_norm": 0.3010137602109028, "learning_rate": 1.928449443153679e-05, "loss": 0.204, "step": 1848 }, { "epoch": 0.14760118144807216, "grad_norm": 0.32702054807230696, "learning_rate": 1.9283533729329015e-05, "loss": 0.2448, "step": 1849 }, { "epoch": 0.14768100902051567, "grad_norm": 0.4814133227590728, "learning_rate": 1.9282572406553755e-05, "loss": 0.1959, "step": 1850 }, { "epoch": 0.14776083659295922, "grad_norm": 0.32966579221270287, "learning_rate": 1.9281610463275268e-05, "loss": 0.1886, "step": 1851 }, { "epoch": 0.14784066416540273, "grad_norm": 0.36159767136221554, "learning_rate": 1.9280647899557845e-05, "loss": 0.2066, "step": 1852 }, { "epoch": 0.14792049173784624, "grad_norm": 0.4337669452380557, "learning_rate": 1.927968471546584e-05, "loss": 0.2221, "step": 1853 }, { "epoch": 0.14800031931028979, "grad_norm": 0.47101605514717604, "learning_rate": 1.9278720911063637e-05, "loss": 0.1987, "step": 1854 }, { "epoch": 0.1480801468827333, "grad_norm": 0.3743341892927465, "learning_rate": 1.927775648641566e-05, "loss": 0.2447, "step": 1855 }, { "epoch": 0.1481599744551768, "grad_norm": 0.426444702969414, "learning_rate": 1.9276791441586377e-05, "loss": 0.2508, "step": 1856 }, { "epoch": 0.14823980202762033, "grad_norm": 0.3054988070195838, "learning_rate": 1.92758257766403e-05, "loss": 0.2403, "step": 1857 }, { "epoch": 0.14831962960006387, "grad_norm": 0.30256457012728627, "learning_rate": 1.9274859491641977e-05, "loss": 0.2382, "step": 1858 }, { "epoch": 0.14839945717250738, "grad_norm": 0.37502829071776195, "learning_rate": 1.9273892586656002e-05, "loss": 0.2367, "step": 1859 }, { "epoch": 0.1484792847449509, "grad_norm": 0.298765950728475, "learning_rate": 1.927292506174701e-05, "loss": 0.2185, "step": 1860 }, { "epoch": 0.14855911231739444, "grad_norm": 0.4035364063443522, "learning_rate": 1.9271956916979672e-05, "loss": 0.2025, "step": 1861 }, { "epoch": 0.14863893988983795, "grad_norm": 0.2522128930741751, "learning_rate": 1.9270988152418706e-05, "loss": 0.2547, "step": 1862 }, { "epoch": 0.14871876746228146, "grad_norm": 0.2661935627319245, "learning_rate": 1.9270018768128877e-05, "loss": 0.3234, "step": 1863 }, { "epoch": 0.148798595034725, "grad_norm": 0.3057920652287855, "learning_rate": 1.9269048764174974e-05, "loss": 0.227, "step": 1864 }, { "epoch": 0.14887842260716852, "grad_norm": 0.22477053541286462, "learning_rate": 1.926807814062184e-05, "loss": 0.289, "step": 1865 }, { "epoch": 0.14895825017961203, "grad_norm": 0.3030210890658266, "learning_rate": 1.9267106897534366e-05, "loss": 0.1801, "step": 1866 }, { "epoch": 0.14903807775205555, "grad_norm": 0.4407758045888959, "learning_rate": 1.9266135034977467e-05, "loss": 0.2379, "step": 1867 }, { "epoch": 0.1491179053244991, "grad_norm": 0.2635955904360116, "learning_rate": 1.926516255301611e-05, "loss": 0.2301, "step": 1868 }, { "epoch": 0.1491977328969426, "grad_norm": 0.32676517262236915, "learning_rate": 1.9264189451715306e-05, "loss": 0.2187, "step": 1869 }, { "epoch": 0.14927756046938612, "grad_norm": 0.25644701360834804, "learning_rate": 1.9263215731140093e-05, "loss": 0.2449, "step": 1870 }, { "epoch": 0.14935738804182966, "grad_norm": 0.27478741528188766, "learning_rate": 1.9262241391355573e-05, "loss": 0.2394, "step": 1871 }, { "epoch": 0.14943721561427317, "grad_norm": 0.2716063040984868, "learning_rate": 1.926126643242687e-05, "loss": 0.2099, "step": 1872 }, { "epoch": 0.14951704318671669, "grad_norm": 0.2780067954735612, "learning_rate": 1.926029085441915e-05, "loss": 0.2423, "step": 1873 }, { "epoch": 0.14959687075916023, "grad_norm": 0.2693702580334738, "learning_rate": 1.925931465739764e-05, "loss": 0.2494, "step": 1874 }, { "epoch": 0.14967669833160374, "grad_norm": 0.2814111677772333, "learning_rate": 1.9258337841427588e-05, "loss": 0.2156, "step": 1875 }, { "epoch": 0.14975652590404726, "grad_norm": 0.34239080665116256, "learning_rate": 1.925736040657429e-05, "loss": 0.1867, "step": 1876 }, { "epoch": 0.14983635347649077, "grad_norm": 0.3530130905255927, "learning_rate": 1.9256382352903086e-05, "loss": 0.1854, "step": 1877 }, { "epoch": 0.1499161810489343, "grad_norm": 0.36699700404973545, "learning_rate": 1.9255403680479348e-05, "loss": 0.2347, "step": 1878 }, { "epoch": 0.14999600862137782, "grad_norm": 0.3690499010410638, "learning_rate": 1.9254424389368505e-05, "loss": 0.2203, "step": 1879 }, { "epoch": 0.15007583619382134, "grad_norm": 0.2827679852097342, "learning_rate": 1.9253444479636013e-05, "loss": 0.2078, "step": 1880 }, { "epoch": 0.15015566376626488, "grad_norm": 0.27882592315331367, "learning_rate": 1.925246395134738e-05, "loss": 0.2157, "step": 1881 }, { "epoch": 0.1502354913387084, "grad_norm": 0.27236033713648505, "learning_rate": 1.9251482804568144e-05, "loss": 0.2297, "step": 1882 }, { "epoch": 0.1503153189111519, "grad_norm": 0.3504057839075102, "learning_rate": 1.9250501039363895e-05, "loss": 0.1964, "step": 1883 }, { "epoch": 0.15039514648359542, "grad_norm": 0.31866921966115913, "learning_rate": 1.9249518655800262e-05, "loss": 0.2386, "step": 1884 }, { "epoch": 0.15047497405603896, "grad_norm": 0.33006069203038635, "learning_rate": 1.9248535653942913e-05, "loss": 0.2097, "step": 1885 }, { "epoch": 0.15055480162848248, "grad_norm": 0.396891262820491, "learning_rate": 1.9247552033857554e-05, "loss": 0.1886, "step": 1886 }, { "epoch": 0.150634629200926, "grad_norm": 0.3028315934575601, "learning_rate": 1.9246567795609936e-05, "loss": 0.1813, "step": 1887 }, { "epoch": 0.15071445677336953, "grad_norm": 0.2859484939092699, "learning_rate": 1.9245582939265855e-05, "loss": 0.2701, "step": 1888 }, { "epoch": 0.15079428434581305, "grad_norm": 0.31272526314629956, "learning_rate": 1.9244597464891144e-05, "loss": 0.2029, "step": 1889 }, { "epoch": 0.15087411191825656, "grad_norm": 0.2804185075840273, "learning_rate": 1.9243611372551677e-05, "loss": 0.1985, "step": 1890 }, { "epoch": 0.1509539394907001, "grad_norm": 0.2765131047362252, "learning_rate": 1.9242624662313368e-05, "loss": 0.2474, "step": 1891 }, { "epoch": 0.15103376706314361, "grad_norm": 0.25762212670577916, "learning_rate": 1.924163733424218e-05, "loss": 0.2426, "step": 1892 }, { "epoch": 0.15111359463558713, "grad_norm": 0.3015769587052692, "learning_rate": 1.924064938840411e-05, "loss": 0.2488, "step": 1893 }, { "epoch": 0.15119342220803064, "grad_norm": 0.3379746929207126, "learning_rate": 1.9239660824865192e-05, "loss": 0.2308, "step": 1894 }, { "epoch": 0.15127324978047418, "grad_norm": 0.276022865185677, "learning_rate": 1.9238671643691518e-05, "loss": 0.1912, "step": 1895 }, { "epoch": 0.1513530773529177, "grad_norm": 0.2817509755723901, "learning_rate": 1.9237681844949204e-05, "loss": 0.2569, "step": 1896 }, { "epoch": 0.1514329049253612, "grad_norm": 0.45029180458689116, "learning_rate": 1.923669142870442e-05, "loss": 0.1936, "step": 1897 }, { "epoch": 0.15151273249780475, "grad_norm": 0.2992360647818064, "learning_rate": 1.9235700395023366e-05, "loss": 0.2037, "step": 1898 }, { "epoch": 0.15159256007024827, "grad_norm": 0.3320497004878461, "learning_rate": 1.9234708743972286e-05, "loss": 0.2376, "step": 1899 }, { "epoch": 0.15167238764269178, "grad_norm": 0.31014145744407134, "learning_rate": 1.9233716475617476e-05, "loss": 0.2099, "step": 1900 }, { "epoch": 0.1517522152151353, "grad_norm": 0.2604623978747926, "learning_rate": 1.923272359002526e-05, "loss": 0.2095, "step": 1901 }, { "epoch": 0.15183204278757884, "grad_norm": 0.329416323719604, "learning_rate": 1.923173008726201e-05, "loss": 0.1952, "step": 1902 }, { "epoch": 0.15191187036002235, "grad_norm": 0.27460397953137855, "learning_rate": 1.9230735967394135e-05, "loss": 0.2535, "step": 1903 }, { "epoch": 0.15199169793246586, "grad_norm": 0.25655129806719906, "learning_rate": 1.9229741230488096e-05, "loss": 0.2497, "step": 1904 }, { "epoch": 0.1520715255049094, "grad_norm": 0.3577188848234623, "learning_rate": 1.9228745876610376e-05, "loss": 0.2198, "step": 1905 }, { "epoch": 0.15215135307735292, "grad_norm": 0.28919238733703606, "learning_rate": 1.922774990582752e-05, "loss": 0.219, "step": 1906 }, { "epoch": 0.15223118064979643, "grad_norm": 0.3486921259474692, "learning_rate": 1.92267533182061e-05, "loss": 0.19, "step": 1907 }, { "epoch": 0.15231100822223997, "grad_norm": 0.28542336527318346, "learning_rate": 1.9225756113812735e-05, "loss": 0.2119, "step": 1908 }, { "epoch": 0.1523908357946835, "grad_norm": 0.31549879734501435, "learning_rate": 1.9224758292714083e-05, "loss": 0.2196, "step": 1909 }, { "epoch": 0.152470663367127, "grad_norm": 0.5050695094580687, "learning_rate": 1.9223759854976847e-05, "loss": 0.2456, "step": 1910 }, { "epoch": 0.15255049093957052, "grad_norm": 0.25450351708145375, "learning_rate": 1.9222760800667765e-05, "loss": 0.2346, "step": 1911 }, { "epoch": 0.15263031851201406, "grad_norm": 0.25537453764595236, "learning_rate": 1.9221761129853623e-05, "loss": 0.2446, "step": 1912 }, { "epoch": 0.15271014608445757, "grad_norm": 0.33415565254444435, "learning_rate": 1.9220760842601245e-05, "loss": 0.2339, "step": 1913 }, { "epoch": 0.15278997365690108, "grad_norm": 0.3718263556848851, "learning_rate": 1.9219759938977492e-05, "loss": 0.2078, "step": 1914 }, { "epoch": 0.15286980122934463, "grad_norm": 0.26819178666159044, "learning_rate": 1.9218758419049278e-05, "loss": 0.1926, "step": 1915 }, { "epoch": 0.15294962880178814, "grad_norm": 0.34020372801052784, "learning_rate": 1.9217756282883543e-05, "loss": 0.2036, "step": 1916 }, { "epoch": 0.15302945637423165, "grad_norm": 0.3383395948555955, "learning_rate": 1.921675353054728e-05, "loss": 0.2987, "step": 1917 }, { "epoch": 0.1531092839466752, "grad_norm": 0.29900758821304846, "learning_rate": 1.9215750162107517e-05, "loss": 0.2146, "step": 1918 }, { "epoch": 0.1531891115191187, "grad_norm": 0.30871581698422285, "learning_rate": 1.9214746177631327e-05, "loss": 0.2089, "step": 1919 }, { "epoch": 0.15326893909156222, "grad_norm": 0.29321881504079694, "learning_rate": 1.9213741577185822e-05, "loss": 0.2429, "step": 1920 }, { "epoch": 0.15334876666400574, "grad_norm": 0.33134953177544973, "learning_rate": 1.9212736360838155e-05, "loss": 0.2197, "step": 1921 }, { "epoch": 0.15342859423644928, "grad_norm": 0.2486403599272006, "learning_rate": 1.921173052865552e-05, "loss": 0.2498, "step": 1922 }, { "epoch": 0.1535084218088928, "grad_norm": 0.26378343476161153, "learning_rate": 1.9210724080705155e-05, "loss": 0.1799, "step": 1923 }, { "epoch": 0.1535882493813363, "grad_norm": 0.31686672817811257, "learning_rate": 1.9209717017054334e-05, "loss": 0.2658, "step": 1924 }, { "epoch": 0.15366807695377985, "grad_norm": 0.3331149181160672, "learning_rate": 1.920870933777038e-05, "loss": 0.2036, "step": 1925 }, { "epoch": 0.15374790452622336, "grad_norm": 0.2754272354847625, "learning_rate": 1.920770104292065e-05, "loss": 0.2552, "step": 1926 }, { "epoch": 0.15382773209866688, "grad_norm": 0.30178340747808846, "learning_rate": 1.920669213257254e-05, "loss": 0.2336, "step": 1927 }, { "epoch": 0.1539075596711104, "grad_norm": 0.3077364036948882, "learning_rate": 1.9205682606793497e-05, "loss": 0.1797, "step": 1928 }, { "epoch": 0.15398738724355393, "grad_norm": 0.32259709084345867, "learning_rate": 1.9204672465651005e-05, "loss": 0.2232, "step": 1929 }, { "epoch": 0.15406721481599744, "grad_norm": 0.33678860075802347, "learning_rate": 1.9203661709212583e-05, "loss": 0.2226, "step": 1930 }, { "epoch": 0.15414704238844096, "grad_norm": 0.37323894479997444, "learning_rate": 1.9202650337545802e-05, "loss": 0.2105, "step": 1931 }, { "epoch": 0.1542268699608845, "grad_norm": 0.24059024800494677, "learning_rate": 1.920163835071826e-05, "loss": 0.2115, "step": 1932 }, { "epoch": 0.154306697533328, "grad_norm": 0.30611366254543804, "learning_rate": 1.9200625748797612e-05, "loss": 0.1801, "step": 1933 }, { "epoch": 0.15438652510577153, "grad_norm": 0.3738950678936715, "learning_rate": 1.919961253185154e-05, "loss": 0.2329, "step": 1934 }, { "epoch": 0.15446635267821507, "grad_norm": 0.2731049632114704, "learning_rate": 1.9198598699947784e-05, "loss": 0.2363, "step": 1935 }, { "epoch": 0.15454618025065858, "grad_norm": 0.3665752755677801, "learning_rate": 1.9197584253154103e-05, "loss": 0.2343, "step": 1936 }, { "epoch": 0.1546260078231021, "grad_norm": 0.3351072940653784, "learning_rate": 1.9196569191538314e-05, "loss": 0.2229, "step": 1937 }, { "epoch": 0.1547058353955456, "grad_norm": 0.31223041916194305, "learning_rate": 1.919555351516827e-05, "loss": 0.2423, "step": 1938 }, { "epoch": 0.15478566296798915, "grad_norm": 0.23143448108715098, "learning_rate": 1.919453722411186e-05, "loss": 0.23, "step": 1939 }, { "epoch": 0.15486549054043267, "grad_norm": 0.34071860203795745, "learning_rate": 1.919352031843703e-05, "loss": 0.264, "step": 1940 }, { "epoch": 0.15494531811287618, "grad_norm": 0.3434443916031468, "learning_rate": 1.9192502798211742e-05, "loss": 0.249, "step": 1941 }, { "epoch": 0.15502514568531972, "grad_norm": 0.2881676104469019, "learning_rate": 1.919148466350403e-05, "loss": 0.2151, "step": 1942 }, { "epoch": 0.15510497325776323, "grad_norm": 0.29186659017599365, "learning_rate": 1.9190465914381936e-05, "loss": 0.2422, "step": 1943 }, { "epoch": 0.15518480083020675, "grad_norm": 0.3365977517369169, "learning_rate": 1.9189446550913565e-05, "loss": 0.2041, "step": 1944 }, { "epoch": 0.15526462840265026, "grad_norm": 0.33241865936528125, "learning_rate": 1.918842657316706e-05, "loss": 0.2598, "step": 1945 }, { "epoch": 0.1553444559750938, "grad_norm": 0.28345681852883375, "learning_rate": 1.9187405981210602e-05, "loss": 0.2228, "step": 1946 }, { "epoch": 0.15542428354753732, "grad_norm": 0.38344544641941, "learning_rate": 1.9186384775112416e-05, "loss": 0.2668, "step": 1947 }, { "epoch": 0.15550411111998083, "grad_norm": 0.2927437795031915, "learning_rate": 1.9185362954940757e-05, "loss": 0.2109, "step": 1948 }, { "epoch": 0.15558393869242437, "grad_norm": 0.4836148685690021, "learning_rate": 1.9184340520763935e-05, "loss": 0.2133, "step": 1949 }, { "epoch": 0.1556637662648679, "grad_norm": 0.28233787694689894, "learning_rate": 1.9183317472650296e-05, "loss": 0.2354, "step": 1950 }, { "epoch": 0.1557435938373114, "grad_norm": 0.3385993278179593, "learning_rate": 1.918229381066823e-05, "loss": 0.2461, "step": 1951 }, { "epoch": 0.15582342140975494, "grad_norm": 0.334719427721527, "learning_rate": 1.918126953488616e-05, "loss": 0.2643, "step": 1952 }, { "epoch": 0.15590324898219846, "grad_norm": 0.29332056871692, "learning_rate": 1.9180244645372553e-05, "loss": 0.3106, "step": 1953 }, { "epoch": 0.15598307655464197, "grad_norm": 0.268186611164537, "learning_rate": 1.9179219142195923e-05, "loss": 0.1992, "step": 1954 }, { "epoch": 0.15606290412708548, "grad_norm": 0.38358565677916634, "learning_rate": 1.917819302542482e-05, "loss": 0.2322, "step": 1955 }, { "epoch": 0.15614273169952902, "grad_norm": 0.270571125269581, "learning_rate": 1.9177166295127834e-05, "loss": 0.2447, "step": 1956 }, { "epoch": 0.15622255927197254, "grad_norm": 0.2612750904351766, "learning_rate": 1.9176138951373598e-05, "loss": 0.2182, "step": 1957 }, { "epoch": 0.15630238684441605, "grad_norm": 0.3731282071139798, "learning_rate": 1.9175110994230786e-05, "loss": 0.2721, "step": 1958 }, { "epoch": 0.1563822144168596, "grad_norm": 0.3357557534763451, "learning_rate": 1.9174082423768118e-05, "loss": 0.25, "step": 1959 }, { "epoch": 0.1564620419893031, "grad_norm": 0.34514080947363274, "learning_rate": 1.9173053240054344e-05, "loss": 0.2388, "step": 1960 }, { "epoch": 0.15654186956174662, "grad_norm": 0.36674636870140104, "learning_rate": 1.917202344315826e-05, "loss": 0.2216, "step": 1961 }, { "epoch": 0.15662169713419016, "grad_norm": 0.27515230725278605, "learning_rate": 1.917099303314871e-05, "loss": 0.2008, "step": 1962 }, { "epoch": 0.15670152470663368, "grad_norm": 0.373106521232635, "learning_rate": 1.916996201009457e-05, "loss": 0.2492, "step": 1963 }, { "epoch": 0.1567813522790772, "grad_norm": 0.32512124110792306, "learning_rate": 1.9168930374064756e-05, "loss": 0.2586, "step": 1964 }, { "epoch": 0.1568611798515207, "grad_norm": 0.2862312929808222, "learning_rate": 1.9167898125128235e-05, "loss": 0.2173, "step": 1965 }, { "epoch": 0.15694100742396425, "grad_norm": 0.30253675237469535, "learning_rate": 1.9166865263354e-05, "loss": 0.1979, "step": 1966 }, { "epoch": 0.15702083499640776, "grad_norm": 0.2535775579982441, "learning_rate": 1.9165831788811104e-05, "loss": 0.2387, "step": 1967 }, { "epoch": 0.15710066256885127, "grad_norm": 0.26183617810136145, "learning_rate": 1.9164797701568623e-05, "loss": 0.2247, "step": 1968 }, { "epoch": 0.15718049014129482, "grad_norm": 0.3509694903307973, "learning_rate": 1.916376300169569e-05, "loss": 0.1794, "step": 1969 }, { "epoch": 0.15726031771373833, "grad_norm": 0.3261068736086061, "learning_rate": 1.916272768926146e-05, "loss": 0.1983, "step": 1970 }, { "epoch": 0.15734014528618184, "grad_norm": 0.2626068413237752, "learning_rate": 1.9161691764335147e-05, "loss": 0.1999, "step": 1971 }, { "epoch": 0.15741997285862536, "grad_norm": 0.24197994867911166, "learning_rate": 1.9160655226985993e-05, "loss": 0.2173, "step": 1972 }, { "epoch": 0.1574998004310689, "grad_norm": 0.2611067680129045, "learning_rate": 1.9159618077283294e-05, "loss": 0.2178, "step": 1973 }, { "epoch": 0.1575796280035124, "grad_norm": 0.2591814395334958, "learning_rate": 1.9158580315296375e-05, "loss": 0.2859, "step": 1974 }, { "epoch": 0.15765945557595593, "grad_norm": 0.27259960985541215, "learning_rate": 1.9157541941094604e-05, "loss": 0.203, "step": 1975 }, { "epoch": 0.15773928314839947, "grad_norm": 0.31983875609076556, "learning_rate": 1.9156502954747394e-05, "loss": 0.2551, "step": 1976 }, { "epoch": 0.15781911072084298, "grad_norm": 0.29806739808275273, "learning_rate": 1.9155463356324198e-05, "loss": 0.2311, "step": 1977 }, { "epoch": 0.1578989382932865, "grad_norm": 0.3315967742464483, "learning_rate": 1.915442314589451e-05, "loss": 0.2248, "step": 1978 }, { "epoch": 0.15797876586573004, "grad_norm": 0.3050186871503799, "learning_rate": 1.9153382323527862e-05, "loss": 0.2223, "step": 1979 }, { "epoch": 0.15805859343817355, "grad_norm": 0.3006863779227033, "learning_rate": 1.915234088929383e-05, "loss": 0.2782, "step": 1980 }, { "epoch": 0.15813842101061706, "grad_norm": 0.2751016544094329, "learning_rate": 1.915129884326203e-05, "loss": 0.2021, "step": 1981 }, { "epoch": 0.15821824858306058, "grad_norm": 0.28410207997866616, "learning_rate": 1.9150256185502118e-05, "loss": 0.2513, "step": 1982 }, { "epoch": 0.15829807615550412, "grad_norm": 0.26424590952984134, "learning_rate": 1.9149212916083787e-05, "loss": 0.2287, "step": 1983 }, { "epoch": 0.15837790372794763, "grad_norm": 0.25735864741743675, "learning_rate": 1.9148169035076782e-05, "loss": 0.2783, "step": 1984 }, { "epoch": 0.15845773130039115, "grad_norm": 0.2527537508023705, "learning_rate": 1.9147124542550882e-05, "loss": 0.2187, "step": 1985 }, { "epoch": 0.1585375588728347, "grad_norm": 0.2638889488558529, "learning_rate": 1.9146079438575905e-05, "loss": 0.2346, "step": 1986 }, { "epoch": 0.1586173864452782, "grad_norm": 0.3588230004486453, "learning_rate": 1.9145033723221712e-05, "loss": 0.2416, "step": 1987 }, { "epoch": 0.15869721401772172, "grad_norm": 0.41375299344345473, "learning_rate": 1.9143987396558206e-05, "loss": 0.2441, "step": 1988 }, { "epoch": 0.15877704159016523, "grad_norm": 0.28276618965625894, "learning_rate": 1.914294045865533e-05, "loss": 0.246, "step": 1989 }, { "epoch": 0.15885686916260877, "grad_norm": 0.2834220909868788, "learning_rate": 1.9141892909583063e-05, "loss": 0.2626, "step": 1990 }, { "epoch": 0.15893669673505229, "grad_norm": 0.37972812941454, "learning_rate": 1.9140844749411433e-05, "loss": 0.2781, "step": 1991 }, { "epoch": 0.1590165243074958, "grad_norm": 0.2975839796733844, "learning_rate": 1.913979597821051e-05, "loss": 0.2527, "step": 1992 }, { "epoch": 0.15909635187993934, "grad_norm": 0.316749729057574, "learning_rate": 1.9138746596050396e-05, "loss": 0.2155, "step": 1993 }, { "epoch": 0.15917617945238285, "grad_norm": 0.40382995202923405, "learning_rate": 1.9137696603001234e-05, "loss": 0.2183, "step": 1994 }, { "epoch": 0.15925600702482637, "grad_norm": 0.259816481503123, "learning_rate": 1.9136645999133216e-05, "loss": 0.2382, "step": 1995 }, { "epoch": 0.1593358345972699, "grad_norm": 0.26794067301390423, "learning_rate": 1.9135594784516573e-05, "loss": 0.2697, "step": 1996 }, { "epoch": 0.15941566216971342, "grad_norm": 0.2652790546420171, "learning_rate": 1.9134542959221573e-05, "loss": 0.223, "step": 1997 }, { "epoch": 0.15949548974215694, "grad_norm": 0.2890677489418884, "learning_rate": 1.9133490523318522e-05, "loss": 0.2423, "step": 1998 }, { "epoch": 0.15957531731460045, "grad_norm": 0.33267372095130526, "learning_rate": 1.9132437476877775e-05, "loss": 0.1792, "step": 1999 }, { "epoch": 0.159655144887044, "grad_norm": 0.296351395779772, "learning_rate": 1.9131383819969728e-05, "loss": 0.2048, "step": 2000 }, { "epoch": 0.1597349724594875, "grad_norm": 0.29604098014230806, "learning_rate": 1.9130329552664805e-05, "loss": 0.2007, "step": 2001 }, { "epoch": 0.15981480003193102, "grad_norm": 0.28114335450306205, "learning_rate": 1.9129274675033484e-05, "loss": 0.2255, "step": 2002 }, { "epoch": 0.15989462760437456, "grad_norm": 0.2951455952466968, "learning_rate": 1.912821918714628e-05, "loss": 0.2235, "step": 2003 }, { "epoch": 0.15997445517681808, "grad_norm": 0.26658801637790575, "learning_rate": 1.9127163089073752e-05, "loss": 0.1734, "step": 2004 }, { "epoch": 0.1600542827492616, "grad_norm": 0.2781633586034069, "learning_rate": 1.9126106380886487e-05, "loss": 0.2382, "step": 2005 }, { "epoch": 0.1601341103217051, "grad_norm": 0.3095294209996046, "learning_rate": 1.9125049062655127e-05, "loss": 0.1771, "step": 2006 }, { "epoch": 0.16021393789414864, "grad_norm": 0.35453670071486204, "learning_rate": 1.9123991134450347e-05, "loss": 0.1972, "step": 2007 }, { "epoch": 0.16029376546659216, "grad_norm": 0.27748005927699454, "learning_rate": 1.912293259634287e-05, "loss": 0.2267, "step": 2008 }, { "epoch": 0.16037359303903567, "grad_norm": 0.3501634567046583, "learning_rate": 1.912187344840345e-05, "loss": 0.2436, "step": 2009 }, { "epoch": 0.1604534206114792, "grad_norm": 0.31200568881996743, "learning_rate": 1.9120813690702894e-05, "loss": 0.2078, "step": 2010 }, { "epoch": 0.16053324818392273, "grad_norm": 0.3302000392712492, "learning_rate": 1.9119753323312033e-05, "loss": 0.2461, "step": 2011 }, { "epoch": 0.16061307575636624, "grad_norm": 0.3457466870920359, "learning_rate": 1.911869234630175e-05, "loss": 0.2123, "step": 2012 }, { "epoch": 0.16069290332880978, "grad_norm": 0.30200708479279875, "learning_rate": 1.9117630759742977e-05, "loss": 0.2514, "step": 2013 }, { "epoch": 0.1607727309012533, "grad_norm": 0.38536541350334225, "learning_rate": 1.911656856370667e-05, "loss": 0.2251, "step": 2014 }, { "epoch": 0.1608525584736968, "grad_norm": 0.3950413936298369, "learning_rate": 1.9115505758263827e-05, "loss": 0.1739, "step": 2015 }, { "epoch": 0.16093238604614032, "grad_norm": 0.3260535379157829, "learning_rate": 1.91144423434855e-05, "loss": 0.2269, "step": 2016 }, { "epoch": 0.16101221361858387, "grad_norm": 0.3777358120512796, "learning_rate": 1.9113378319442774e-05, "loss": 0.2034, "step": 2017 }, { "epoch": 0.16109204119102738, "grad_norm": 0.3249506618399207, "learning_rate": 1.911231368620677e-05, "loss": 0.2068, "step": 2018 }, { "epoch": 0.1611718687634709, "grad_norm": 0.4067445360094655, "learning_rate": 1.911124844384866e-05, "loss": 0.1883, "step": 2019 }, { "epoch": 0.16125169633591444, "grad_norm": 0.3443588883363197, "learning_rate": 1.911018259243965e-05, "loss": 0.2537, "step": 2020 }, { "epoch": 0.16133152390835795, "grad_norm": 0.32953083236535474, "learning_rate": 1.9109116132050983e-05, "loss": 0.2528, "step": 2021 }, { "epoch": 0.16141135148080146, "grad_norm": 0.3828252520531006, "learning_rate": 1.9108049062753952e-05, "loss": 0.2177, "step": 2022 }, { "epoch": 0.161491179053245, "grad_norm": 0.260324319689098, "learning_rate": 1.9106981384619887e-05, "loss": 0.2357, "step": 2023 }, { "epoch": 0.16157100662568852, "grad_norm": 0.3128741334683922, "learning_rate": 1.9105913097720153e-05, "loss": 0.1965, "step": 2024 }, { "epoch": 0.16165083419813203, "grad_norm": 0.3613308207678467, "learning_rate": 1.910484420212617e-05, "loss": 0.2295, "step": 2025 }, { "epoch": 0.16173066177057555, "grad_norm": 0.3283893036595784, "learning_rate": 1.910377469790938e-05, "loss": 0.2632, "step": 2026 }, { "epoch": 0.1618104893430191, "grad_norm": 0.3986576552159893, "learning_rate": 1.910270458514128e-05, "loss": 0.2212, "step": 2027 }, { "epoch": 0.1618903169154626, "grad_norm": 0.32557631718456115, "learning_rate": 1.91016338638934e-05, "loss": 0.1929, "step": 2028 }, { "epoch": 0.16197014448790611, "grad_norm": 0.3592517314440519, "learning_rate": 1.9100562534237318e-05, "loss": 0.2037, "step": 2029 }, { "epoch": 0.16204997206034966, "grad_norm": 0.3186832205830453, "learning_rate": 1.9099490596244643e-05, "loss": 0.1954, "step": 2030 }, { "epoch": 0.16212979963279317, "grad_norm": 0.27129318073168185, "learning_rate": 1.9098418049987032e-05, "loss": 0.1947, "step": 2031 }, { "epoch": 0.16220962720523668, "grad_norm": 0.3749880091076863, "learning_rate": 1.9097344895536183e-05, "loss": 0.2321, "step": 2032 }, { "epoch": 0.1622894547776802, "grad_norm": 0.33651519908135713, "learning_rate": 1.909627113296383e-05, "loss": 0.2067, "step": 2033 }, { "epoch": 0.16236928235012374, "grad_norm": 0.333762320883695, "learning_rate": 1.9095196762341747e-05, "loss": 0.2419, "step": 2034 }, { "epoch": 0.16244910992256725, "grad_norm": 0.2654362388157408, "learning_rate": 1.9094121783741756e-05, "loss": 0.2792, "step": 2035 }, { "epoch": 0.16252893749501077, "grad_norm": 0.34555422593383295, "learning_rate": 1.909304619723571e-05, "loss": 0.1987, "step": 2036 }, { "epoch": 0.1626087650674543, "grad_norm": 0.28438700887094687, "learning_rate": 1.9091970002895515e-05, "loss": 0.2198, "step": 2037 }, { "epoch": 0.16268859263989782, "grad_norm": 0.2891795657317849, "learning_rate": 1.9090893200793102e-05, "loss": 0.2117, "step": 2038 }, { "epoch": 0.16276842021234134, "grad_norm": 0.36191242278780383, "learning_rate": 1.9089815791000457e-05, "loss": 0.207, "step": 2039 }, { "epoch": 0.16284824778478488, "grad_norm": 0.34298466323152704, "learning_rate": 1.9088737773589597e-05, "loss": 0.2378, "step": 2040 }, { "epoch": 0.1629280753572284, "grad_norm": 0.3048238429611321, "learning_rate": 1.908765914863259e-05, "loss": 0.2333, "step": 2041 }, { "epoch": 0.1630079029296719, "grad_norm": 0.3486025400760455, "learning_rate": 1.9086579916201526e-05, "loss": 0.2593, "step": 2042 }, { "epoch": 0.16308773050211542, "grad_norm": 0.3247120537087838, "learning_rate": 1.9085500076368557e-05, "loss": 0.2204, "step": 2043 }, { "epoch": 0.16316755807455896, "grad_norm": 0.307094721095493, "learning_rate": 1.9084419629205863e-05, "loss": 0.1795, "step": 2044 }, { "epoch": 0.16324738564700247, "grad_norm": 0.2959961186607089, "learning_rate": 1.9083338574785667e-05, "loss": 0.1784, "step": 2045 }, { "epoch": 0.163327213219446, "grad_norm": 0.271561413807684, "learning_rate": 1.908225691318024e-05, "loss": 0.24, "step": 2046 }, { "epoch": 0.16340704079188953, "grad_norm": 0.3251781712278533, "learning_rate": 1.9081174644461872e-05, "loss": 0.2014, "step": 2047 }, { "epoch": 0.16348686836433304, "grad_norm": 0.29176259382570646, "learning_rate": 1.908009176870292e-05, "loss": 0.17, "step": 2048 }, { "epoch": 0.16356669593677656, "grad_norm": 0.30237816817697566, "learning_rate": 1.9079008285975765e-05, "loss": 0.2405, "step": 2049 }, { "epoch": 0.16364652350922007, "grad_norm": 0.2791120933884575, "learning_rate": 1.907792419635284e-05, "loss": 0.2365, "step": 2050 }, { "epoch": 0.1637263510816636, "grad_norm": 0.29713237835787454, "learning_rate": 1.9076839499906604e-05, "loss": 0.2211, "step": 2051 }, { "epoch": 0.16380617865410713, "grad_norm": 0.28626750142099444, "learning_rate": 1.9075754196709574e-05, "loss": 0.2204, "step": 2052 }, { "epoch": 0.16388600622655064, "grad_norm": 0.2684570412752198, "learning_rate": 1.9074668286834285e-05, "loss": 0.1919, "step": 2053 }, { "epoch": 0.16396583379899418, "grad_norm": 0.2911309255908487, "learning_rate": 1.907358177035334e-05, "loss": 0.2123, "step": 2054 }, { "epoch": 0.1640456613714377, "grad_norm": 0.2916473191914199, "learning_rate": 1.9072494647339357e-05, "loss": 0.2232, "step": 2055 }, { "epoch": 0.1641254889438812, "grad_norm": 0.2775496283612298, "learning_rate": 1.9071406917865012e-05, "loss": 0.2477, "step": 2056 }, { "epoch": 0.16420531651632475, "grad_norm": 0.2610091812847872, "learning_rate": 1.9070318582003015e-05, "loss": 0.2081, "step": 2057 }, { "epoch": 0.16428514408876826, "grad_norm": 0.30123358512402476, "learning_rate": 1.9069229639826113e-05, "loss": 0.2037, "step": 2058 }, { "epoch": 0.16436497166121178, "grad_norm": 0.2944034381768489, "learning_rate": 1.9068140091407104e-05, "loss": 0.2371, "step": 2059 }, { "epoch": 0.1644447992336553, "grad_norm": 0.28666564438617126, "learning_rate": 1.9067049936818812e-05, "loss": 0.2495, "step": 2060 }, { "epoch": 0.16452462680609883, "grad_norm": 0.426015410485463, "learning_rate": 1.9065959176134116e-05, "loss": 0.2066, "step": 2061 }, { "epoch": 0.16460445437854235, "grad_norm": 0.36364300226606866, "learning_rate": 1.906486780942593e-05, "loss": 0.2009, "step": 2062 }, { "epoch": 0.16468428195098586, "grad_norm": 0.3320539498257779, "learning_rate": 1.90637758367672e-05, "loss": 0.2184, "step": 2063 }, { "epoch": 0.1647641095234294, "grad_norm": 0.38479834243492217, "learning_rate": 1.9062683258230926e-05, "loss": 0.2457, "step": 2064 }, { "epoch": 0.16484393709587292, "grad_norm": 0.2580920340520043, "learning_rate": 1.9061590073890143e-05, "loss": 0.2369, "step": 2065 }, { "epoch": 0.16492376466831643, "grad_norm": 0.32819777703190733, "learning_rate": 1.906049628381792e-05, "loss": 0.214, "step": 2066 }, { "epoch": 0.16500359224075997, "grad_norm": 0.3582575717503338, "learning_rate": 1.905940188808738e-05, "loss": 0.2489, "step": 2067 }, { "epoch": 0.16508341981320349, "grad_norm": 0.32612754783608555, "learning_rate": 1.9058306886771674e-05, "loss": 0.1683, "step": 2068 }, { "epoch": 0.165163247385647, "grad_norm": 0.2792743721626606, "learning_rate": 1.9057211279944e-05, "loss": 0.1974, "step": 2069 }, { "epoch": 0.1652430749580905, "grad_norm": 0.34938025623644947, "learning_rate": 1.9056115067677597e-05, "loss": 0.2635, "step": 2070 }, { "epoch": 0.16532290253053405, "grad_norm": 0.36235629715543777, "learning_rate": 1.9055018250045738e-05, "loss": 0.2401, "step": 2071 }, { "epoch": 0.16540273010297757, "grad_norm": 0.31989968039918976, "learning_rate": 1.9053920827121746e-05, "loss": 0.2099, "step": 2072 }, { "epoch": 0.16548255767542108, "grad_norm": 0.35412893137267193, "learning_rate": 1.9052822798978978e-05, "loss": 0.1879, "step": 2073 }, { "epoch": 0.16556238524786462, "grad_norm": 0.345686067847716, "learning_rate": 1.9051724165690827e-05, "loss": 0.1885, "step": 2074 }, { "epoch": 0.16564221282030814, "grad_norm": 0.33597052940969974, "learning_rate": 1.905062492733074e-05, "loss": 0.194, "step": 2075 }, { "epoch": 0.16572204039275165, "grad_norm": 0.26746184923371485, "learning_rate": 1.9049525083972193e-05, "loss": 0.2336, "step": 2076 }, { "epoch": 0.16580186796519517, "grad_norm": 0.2603509743096679, "learning_rate": 1.904842463568871e-05, "loss": 0.1933, "step": 2077 }, { "epoch": 0.1658816955376387, "grad_norm": 0.28766508090230974, "learning_rate": 1.9047323582553844e-05, "loss": 0.2215, "step": 2078 }, { "epoch": 0.16596152311008222, "grad_norm": 0.2689562588332995, "learning_rate": 1.90462219246412e-05, "loss": 0.2437, "step": 2079 }, { "epoch": 0.16604135068252573, "grad_norm": 0.25601392330314854, "learning_rate": 1.9045119662024425e-05, "loss": 0.2262, "step": 2080 }, { "epoch": 0.16612117825496928, "grad_norm": 0.4032730544697024, "learning_rate": 1.9044016794777194e-05, "loss": 0.2048, "step": 2081 }, { "epoch": 0.1662010058274128, "grad_norm": 0.34566934828406165, "learning_rate": 1.9042913322973228e-05, "loss": 0.2151, "step": 2082 }, { "epoch": 0.1662808333998563, "grad_norm": 0.28183430322266256, "learning_rate": 1.9041809246686297e-05, "loss": 0.2459, "step": 2083 }, { "epoch": 0.16636066097229985, "grad_norm": 0.30101207861310764, "learning_rate": 1.90407045659902e-05, "loss": 0.2294, "step": 2084 }, { "epoch": 0.16644048854474336, "grad_norm": 0.3169960823144776, "learning_rate": 1.903959928095878e-05, "loss": 0.217, "step": 2085 }, { "epoch": 0.16652031611718687, "grad_norm": 0.3459692997675166, "learning_rate": 1.903849339166592e-05, "loss": 0.2457, "step": 2086 }, { "epoch": 0.1666001436896304, "grad_norm": 0.38014512068966094, "learning_rate": 1.9037386898185545e-05, "loss": 0.2439, "step": 2087 }, { "epoch": 0.16667997126207393, "grad_norm": 0.3494614115540396, "learning_rate": 1.9036279800591627e-05, "loss": 0.2115, "step": 2088 }, { "epoch": 0.16675979883451744, "grad_norm": 0.35015752100398234, "learning_rate": 1.9035172098958158e-05, "loss": 0.2177, "step": 2089 }, { "epoch": 0.16683962640696096, "grad_norm": 0.3705443976999709, "learning_rate": 1.9034063793359193e-05, "loss": 0.2072, "step": 2090 }, { "epoch": 0.1669194539794045, "grad_norm": 0.3525598347528085, "learning_rate": 1.9032954883868814e-05, "loss": 0.2215, "step": 2091 }, { "epoch": 0.166999281551848, "grad_norm": 0.3117210502193806, "learning_rate": 1.903184537056115e-05, "loss": 0.1739, "step": 2092 }, { "epoch": 0.16707910912429152, "grad_norm": 0.28609070184623336, "learning_rate": 1.903073525351036e-05, "loss": 0.1663, "step": 2093 }, { "epoch": 0.16715893669673504, "grad_norm": 0.3730790193265376, "learning_rate": 1.9029624532790663e-05, "loss": 0.2172, "step": 2094 }, { "epoch": 0.16723876426917858, "grad_norm": 0.3047360201380463, "learning_rate": 1.90285132084763e-05, "loss": 0.2235, "step": 2095 }, { "epoch": 0.1673185918416221, "grad_norm": 0.2679229543138563, "learning_rate": 1.9027401280641558e-05, "loss": 0.2933, "step": 2096 }, { "epoch": 0.1673984194140656, "grad_norm": 0.3013184468444097, "learning_rate": 1.9026288749360763e-05, "loss": 0.2123, "step": 2097 }, { "epoch": 0.16747824698650915, "grad_norm": 0.3926741345814679, "learning_rate": 1.9025175614708287e-05, "loss": 0.2127, "step": 2098 }, { "epoch": 0.16755807455895266, "grad_norm": 0.27943451629715066, "learning_rate": 1.9024061876758538e-05, "loss": 0.2438, "step": 2099 }, { "epoch": 0.16763790213139618, "grad_norm": 0.32394352524449427, "learning_rate": 1.9022947535585965e-05, "loss": 0.2579, "step": 2100 }, { "epoch": 0.16771772970383972, "grad_norm": 0.3204985272742508, "learning_rate": 1.9021832591265056e-05, "loss": 0.2556, "step": 2101 }, { "epoch": 0.16779755727628323, "grad_norm": 0.3828079946790543, "learning_rate": 1.9020717043870342e-05, "loss": 0.1985, "step": 2102 }, { "epoch": 0.16787738484872675, "grad_norm": 0.3247422164981971, "learning_rate": 1.9019600893476394e-05, "loss": 0.214, "step": 2103 }, { "epoch": 0.16795721242117026, "grad_norm": 0.36416162966173593, "learning_rate": 1.901848414015782e-05, "loss": 0.2258, "step": 2104 }, { "epoch": 0.1680370399936138, "grad_norm": 0.3610412856495305, "learning_rate": 1.901736678398927e-05, "loss": 0.1855, "step": 2105 }, { "epoch": 0.16811686756605732, "grad_norm": 0.2685370608523918, "learning_rate": 1.9016248825045433e-05, "loss": 0.2192, "step": 2106 }, { "epoch": 0.16819669513850083, "grad_norm": 0.3104095301243599, "learning_rate": 1.901513026340105e-05, "loss": 0.2158, "step": 2107 }, { "epoch": 0.16827652271094437, "grad_norm": 0.4290453382521011, "learning_rate": 1.901401109913088e-05, "loss": 0.2211, "step": 2108 }, { "epoch": 0.16835635028338788, "grad_norm": 0.3308268268473135, "learning_rate": 1.9012891332309742e-05, "loss": 0.2436, "step": 2109 }, { "epoch": 0.1684361778558314, "grad_norm": 0.2829953053444314, "learning_rate": 1.9011770963012487e-05, "loss": 0.2163, "step": 2110 }, { "epoch": 0.16851600542827494, "grad_norm": 0.3531646198458929, "learning_rate": 1.9010649991314006e-05, "loss": 0.2171, "step": 2111 }, { "epoch": 0.16859583300071845, "grad_norm": 0.33588111397194315, "learning_rate": 1.9009528417289235e-05, "loss": 0.2237, "step": 2112 }, { "epoch": 0.16867566057316197, "grad_norm": 0.28050387026594603, "learning_rate": 1.9008406241013138e-05, "loss": 0.2442, "step": 2113 }, { "epoch": 0.16875548814560548, "grad_norm": 0.3066576166287548, "learning_rate": 1.900728346256074e-05, "loss": 0.2208, "step": 2114 }, { "epoch": 0.16883531571804902, "grad_norm": 0.3419230724417833, "learning_rate": 1.9006160082007082e-05, "loss": 0.2553, "step": 2115 }, { "epoch": 0.16891514329049254, "grad_norm": 0.2574036922210041, "learning_rate": 1.9005036099427265e-05, "loss": 0.2421, "step": 2116 }, { "epoch": 0.16899497086293605, "grad_norm": 0.27549852329412966, "learning_rate": 1.9003911514896424e-05, "loss": 0.2271, "step": 2117 }, { "epoch": 0.1690747984353796, "grad_norm": 0.3619479443456997, "learning_rate": 1.900278632848973e-05, "loss": 0.2057, "step": 2118 }, { "epoch": 0.1691546260078231, "grad_norm": 0.24336821888332094, "learning_rate": 1.90016605402824e-05, "loss": 0.2385, "step": 2119 }, { "epoch": 0.16923445358026662, "grad_norm": 0.2947406711783694, "learning_rate": 1.900053415034968e-05, "loss": 0.236, "step": 2120 }, { "epoch": 0.16931428115271013, "grad_norm": 0.31086805034811615, "learning_rate": 1.8999407158766875e-05, "loss": 0.2586, "step": 2121 }, { "epoch": 0.16939410872515367, "grad_norm": 0.3473430908707159, "learning_rate": 1.8998279565609317e-05, "loss": 0.2241, "step": 2122 }, { "epoch": 0.1694739362975972, "grad_norm": 0.31654251543992534, "learning_rate": 1.8997151370952375e-05, "loss": 0.2476, "step": 2123 }, { "epoch": 0.1695537638700407, "grad_norm": 0.28564065008419776, "learning_rate": 1.8996022574871476e-05, "loss": 0.238, "step": 2124 }, { "epoch": 0.16963359144248424, "grad_norm": 0.28949341926726413, "learning_rate": 1.8994893177442063e-05, "loss": 0.2342, "step": 2125 }, { "epoch": 0.16971341901492776, "grad_norm": 0.3741603516642299, "learning_rate": 1.8993763178739642e-05, "loss": 0.2443, "step": 2126 }, { "epoch": 0.16979324658737127, "grad_norm": 0.2567160473962859, "learning_rate": 1.8992632578839744e-05, "loss": 0.2494, "step": 2127 }, { "epoch": 0.1698730741598148, "grad_norm": 0.33568524029975444, "learning_rate": 1.8991501377817944e-05, "loss": 0.1818, "step": 2128 }, { "epoch": 0.16995290173225833, "grad_norm": 0.3586156880590561, "learning_rate": 1.8990369575749862e-05, "loss": 0.192, "step": 2129 }, { "epoch": 0.17003272930470184, "grad_norm": 0.2758861257172231, "learning_rate": 1.898923717271115e-05, "loss": 0.2124, "step": 2130 }, { "epoch": 0.17011255687714535, "grad_norm": 0.35597511118338687, "learning_rate": 1.8988104168777508e-05, "loss": 0.1995, "step": 2131 }, { "epoch": 0.1701923844495889, "grad_norm": 0.29673403077609317, "learning_rate": 1.898697056402467e-05, "loss": 0.1894, "step": 2132 }, { "epoch": 0.1702722120220324, "grad_norm": 0.3082858331922349, "learning_rate": 1.898583635852842e-05, "loss": 0.2338, "step": 2133 }, { "epoch": 0.17035203959447592, "grad_norm": 0.2762117185798873, "learning_rate": 1.8984701552364568e-05, "loss": 0.2474, "step": 2134 }, { "epoch": 0.17043186716691947, "grad_norm": 0.2945939818788977, "learning_rate": 1.8983566145608974e-05, "loss": 0.2304, "step": 2135 }, { "epoch": 0.17051169473936298, "grad_norm": 0.28693958181170703, "learning_rate": 1.8982430138337535e-05, "loss": 0.2725, "step": 2136 }, { "epoch": 0.1705915223118065, "grad_norm": 0.31447745524435916, "learning_rate": 1.8981293530626187e-05, "loss": 0.1949, "step": 2137 }, { "epoch": 0.17067134988425, "grad_norm": 0.2570624466498106, "learning_rate": 1.898015632255091e-05, "loss": 0.2302, "step": 2138 }, { "epoch": 0.17075117745669355, "grad_norm": 0.2716690866000396, "learning_rate": 1.897901851418772e-05, "loss": 0.2367, "step": 2139 }, { "epoch": 0.17083100502913706, "grad_norm": 0.3823240567464607, "learning_rate": 1.8977880105612678e-05, "loss": 0.206, "step": 2140 }, { "epoch": 0.17091083260158058, "grad_norm": 0.24326586515155507, "learning_rate": 1.897674109690188e-05, "loss": 0.2229, "step": 2141 }, { "epoch": 0.17099066017402412, "grad_norm": 0.3333805732912734, "learning_rate": 1.897560148813146e-05, "loss": 0.205, "step": 2142 }, { "epoch": 0.17107048774646763, "grad_norm": 0.34201619280363804, "learning_rate": 1.8974461279377608e-05, "loss": 0.189, "step": 2143 }, { "epoch": 0.17115031531891114, "grad_norm": 0.2679525505906206, "learning_rate": 1.8973320470716532e-05, "loss": 0.1989, "step": 2144 }, { "epoch": 0.1712301428913547, "grad_norm": 0.24141118606240602, "learning_rate": 1.8972179062224494e-05, "loss": 0.2477, "step": 2145 }, { "epoch": 0.1713099704637982, "grad_norm": 0.27645398597752197, "learning_rate": 1.897103705397779e-05, "loss": 0.2228, "step": 2146 }, { "epoch": 0.17138979803624171, "grad_norm": 0.30937561886686726, "learning_rate": 1.8969894446052764e-05, "loss": 0.2257, "step": 2147 }, { "epoch": 0.17146962560868523, "grad_norm": 0.34368498273312975, "learning_rate": 1.8968751238525792e-05, "loss": 0.2233, "step": 2148 }, { "epoch": 0.17154945318112877, "grad_norm": 0.3226169526170379, "learning_rate": 1.896760743147329e-05, "loss": 0.2432, "step": 2149 }, { "epoch": 0.17162928075357228, "grad_norm": 0.26572443469329016, "learning_rate": 1.8966463024971725e-05, "loss": 0.1974, "step": 2150 }, { "epoch": 0.1717091083260158, "grad_norm": 0.38496382389932565, "learning_rate": 1.8965318019097586e-05, "loss": 0.2067, "step": 2151 }, { "epoch": 0.17178893589845934, "grad_norm": 0.27110054593534044, "learning_rate": 1.896417241392742e-05, "loss": 0.2131, "step": 2152 }, { "epoch": 0.17186876347090285, "grad_norm": 0.33398874031853715, "learning_rate": 1.89630262095378e-05, "loss": 0.211, "step": 2153 }, { "epoch": 0.17194859104334637, "grad_norm": 0.34375110653831237, "learning_rate": 1.896187940600535e-05, "loss": 0.2099, "step": 2154 }, { "epoch": 0.1720284186157899, "grad_norm": 0.2890346163694368, "learning_rate": 1.8960732003406726e-05, "loss": 0.2182, "step": 2155 }, { "epoch": 0.17210824618823342, "grad_norm": 0.34014436749338783, "learning_rate": 1.895958400181863e-05, "loss": 0.2239, "step": 2156 }, { "epoch": 0.17218807376067694, "grad_norm": 0.3089735697661091, "learning_rate": 1.8958435401317804e-05, "loss": 0.2492, "step": 2157 }, { "epoch": 0.17226790133312045, "grad_norm": 0.2797300282219901, "learning_rate": 1.895728620198102e-05, "loss": 0.2568, "step": 2158 }, { "epoch": 0.172347728905564, "grad_norm": 0.33605190618527736, "learning_rate": 1.8956136403885104e-05, "loss": 0.1842, "step": 2159 }, { "epoch": 0.1724275564780075, "grad_norm": 0.4867427671354697, "learning_rate": 1.8954986007106907e-05, "loss": 0.222, "step": 2160 }, { "epoch": 0.17250738405045102, "grad_norm": 0.2969891208954846, "learning_rate": 1.895383501172334e-05, "loss": 0.2284, "step": 2161 }, { "epoch": 0.17258721162289456, "grad_norm": 0.2988981990594706, "learning_rate": 1.8952683417811333e-05, "loss": 0.2113, "step": 2162 }, { "epoch": 0.17266703919533807, "grad_norm": 0.3487084212132291, "learning_rate": 1.895153122544787e-05, "loss": 0.202, "step": 2163 }, { "epoch": 0.1727468667677816, "grad_norm": 0.27524338939511517, "learning_rate": 1.895037843470997e-05, "loss": 0.1884, "step": 2164 }, { "epoch": 0.1728266943402251, "grad_norm": 0.28353088094401413, "learning_rate": 1.894922504567469e-05, "loss": 0.2216, "step": 2165 }, { "epoch": 0.17290652191266864, "grad_norm": 0.37621931742453263, "learning_rate": 1.8948071058419137e-05, "loss": 0.2355, "step": 2166 }, { "epoch": 0.17298634948511216, "grad_norm": 0.31861577761626125, "learning_rate": 1.894691647302044e-05, "loss": 0.2016, "step": 2167 }, { "epoch": 0.17306617705755567, "grad_norm": 0.34422322040597697, "learning_rate": 1.8945761289555788e-05, "loss": 0.2165, "step": 2168 }, { "epoch": 0.1731460046299992, "grad_norm": 0.2722464946940935, "learning_rate": 1.894460550810239e-05, "loss": 0.1818, "step": 2169 }, { "epoch": 0.17322583220244273, "grad_norm": 0.35884625991250085, "learning_rate": 1.8943449128737515e-05, "loss": 0.2585, "step": 2170 }, { "epoch": 0.17330565977488624, "grad_norm": 0.3069102847517227, "learning_rate": 1.8942292151538458e-05, "loss": 0.2409, "step": 2171 }, { "epoch": 0.17338548734732978, "grad_norm": 0.29517092799808864, "learning_rate": 1.8941134576582558e-05, "loss": 0.2271, "step": 2172 }, { "epoch": 0.1734653149197733, "grad_norm": 0.33123573894309216, "learning_rate": 1.8939976403947196e-05, "loss": 0.2413, "step": 2173 }, { "epoch": 0.1735451424922168, "grad_norm": 0.3571523752469898, "learning_rate": 1.8938817633709796e-05, "loss": 0.223, "step": 2174 }, { "epoch": 0.17362497006466032, "grad_norm": 0.299695152422859, "learning_rate": 1.8937658265947805e-05, "loss": 0.2979, "step": 2175 }, { "epoch": 0.17370479763710386, "grad_norm": 0.30305333843583604, "learning_rate": 1.8936498300738728e-05, "loss": 0.2106, "step": 2176 }, { "epoch": 0.17378462520954738, "grad_norm": 0.2504112247268591, "learning_rate": 1.8935337738160112e-05, "loss": 0.2149, "step": 2177 }, { "epoch": 0.1738644527819909, "grad_norm": 0.29680544732298725, "learning_rate": 1.8934176578289522e-05, "loss": 0.2391, "step": 2178 }, { "epoch": 0.17394428035443443, "grad_norm": 0.34026691574838164, "learning_rate": 1.893301482120459e-05, "loss": 0.2358, "step": 2179 }, { "epoch": 0.17402410792687795, "grad_norm": 0.2671381488810007, "learning_rate": 1.8931852466982965e-05, "loss": 0.2062, "step": 2180 }, { "epoch": 0.17410393549932146, "grad_norm": 0.35701192598233555, "learning_rate": 1.893068951570235e-05, "loss": 0.1704, "step": 2181 }, { "epoch": 0.17418376307176497, "grad_norm": 0.2970195998927714, "learning_rate": 1.8929525967440484e-05, "loss": 0.2522, "step": 2182 }, { "epoch": 0.17426359064420852, "grad_norm": 0.3034636580757471, "learning_rate": 1.8928361822275146e-05, "loss": 0.2547, "step": 2183 }, { "epoch": 0.17434341821665203, "grad_norm": 0.25443917713103786, "learning_rate": 1.8927197080284154e-05, "loss": 0.2393, "step": 2184 }, { "epoch": 0.17442324578909554, "grad_norm": 0.25759865723113334, "learning_rate": 1.8926031741545364e-05, "loss": 0.2267, "step": 2185 }, { "epoch": 0.17450307336153908, "grad_norm": 0.40960425921248483, "learning_rate": 1.8924865806136675e-05, "loss": 0.1713, "step": 2186 }, { "epoch": 0.1745829009339826, "grad_norm": 0.357136239085855, "learning_rate": 1.892369927413603e-05, "loss": 0.1952, "step": 2187 }, { "epoch": 0.1746627285064261, "grad_norm": 0.372076127620385, "learning_rate": 1.89225321456214e-05, "loss": 0.246, "step": 2188 }, { "epoch": 0.17474255607886965, "grad_norm": 0.3079206474126164, "learning_rate": 1.892136442067081e-05, "loss": 0.1983, "step": 2189 }, { "epoch": 0.17482238365131317, "grad_norm": 0.28644428912289044, "learning_rate": 1.892019609936231e-05, "loss": 0.2343, "step": 2190 }, { "epoch": 0.17490221122375668, "grad_norm": 0.4085602029939967, "learning_rate": 1.8919027181774002e-05, "loss": 0.1989, "step": 2191 }, { "epoch": 0.1749820387962002, "grad_norm": 0.2859611524186078, "learning_rate": 1.891785766798403e-05, "loss": 0.2468, "step": 2192 }, { "epoch": 0.17506186636864374, "grad_norm": 0.3220070187540451, "learning_rate": 1.8916687558070557e-05, "loss": 0.2494, "step": 2193 }, { "epoch": 0.17514169394108725, "grad_norm": 0.28851789378300097, "learning_rate": 1.891551685211181e-05, "loss": 0.2376, "step": 2194 }, { "epoch": 0.17522152151353076, "grad_norm": 0.2915332817481982, "learning_rate": 1.8914345550186045e-05, "loss": 0.214, "step": 2195 }, { "epoch": 0.1753013490859743, "grad_norm": 0.2979445771997227, "learning_rate": 1.8913173652371555e-05, "loss": 0.2466, "step": 2196 }, { "epoch": 0.17538117665841782, "grad_norm": 0.27735685445323127, "learning_rate": 1.8912001158746684e-05, "loss": 0.2109, "step": 2197 }, { "epoch": 0.17546100423086133, "grad_norm": 0.29283759309637636, "learning_rate": 1.89108280693898e-05, "loss": 0.2085, "step": 2198 }, { "epoch": 0.17554083180330488, "grad_norm": 0.31919695127912956, "learning_rate": 1.8909654384379327e-05, "loss": 0.2208, "step": 2199 }, { "epoch": 0.1756206593757484, "grad_norm": 0.2651951016780016, "learning_rate": 1.8908480103793715e-05, "loss": 0.2259, "step": 2200 }, { "epoch": 0.1757004869481919, "grad_norm": 0.3200951368399428, "learning_rate": 1.8907305227711463e-05, "loss": 0.1711, "step": 2201 }, { "epoch": 0.17578031452063542, "grad_norm": 0.31846398875453624, "learning_rate": 1.8906129756211108e-05, "loss": 0.25, "step": 2202 }, { "epoch": 0.17586014209307896, "grad_norm": 0.361503187212138, "learning_rate": 1.8904953689371223e-05, "loss": 0.2, "step": 2203 }, { "epoch": 0.17593996966552247, "grad_norm": 0.3335069586545224, "learning_rate": 1.8903777027270426e-05, "loss": 0.1934, "step": 2204 }, { "epoch": 0.17601979723796599, "grad_norm": 0.3109915219383946, "learning_rate": 1.8902599769987367e-05, "loss": 0.2159, "step": 2205 }, { "epoch": 0.17609962481040953, "grad_norm": 0.26043313799198925, "learning_rate": 1.8901421917600745e-05, "loss": 0.2547, "step": 2206 }, { "epoch": 0.17617945238285304, "grad_norm": 0.34468669340400737, "learning_rate": 1.89002434701893e-05, "loss": 0.2436, "step": 2207 }, { "epoch": 0.17625927995529656, "grad_norm": 0.3058361288866968, "learning_rate": 1.8899064427831796e-05, "loss": 0.2232, "step": 2208 }, { "epoch": 0.17633910752774007, "grad_norm": 0.2763602633150084, "learning_rate": 1.889788479060705e-05, "loss": 0.2159, "step": 2209 }, { "epoch": 0.1764189351001836, "grad_norm": 0.37342180454089857, "learning_rate": 1.8896704558593928e-05, "loss": 0.2361, "step": 2210 }, { "epoch": 0.17649876267262712, "grad_norm": 0.31709605736298807, "learning_rate": 1.889552373187131e-05, "loss": 0.2033, "step": 2211 }, { "epoch": 0.17657859024507064, "grad_norm": 0.3432446719042788, "learning_rate": 1.889434231051813e-05, "loss": 0.1903, "step": 2212 }, { "epoch": 0.17665841781751418, "grad_norm": 0.3078224654523402, "learning_rate": 1.8893160294613368e-05, "loss": 0.2149, "step": 2213 }, { "epoch": 0.1767382453899577, "grad_norm": 0.33481001040207026, "learning_rate": 1.8891977684236032e-05, "loss": 0.225, "step": 2214 }, { "epoch": 0.1768180729624012, "grad_norm": 0.30999113206966494, "learning_rate": 1.889079447946518e-05, "loss": 0.2419, "step": 2215 }, { "epoch": 0.17689790053484475, "grad_norm": 0.3277584319662651, "learning_rate": 1.8889610680379903e-05, "loss": 0.1788, "step": 2216 }, { "epoch": 0.17697772810728826, "grad_norm": 0.3882089741753531, "learning_rate": 1.888842628705933e-05, "loss": 0.1802, "step": 2217 }, { "epoch": 0.17705755567973178, "grad_norm": 0.2711923481405646, "learning_rate": 1.888724129958263e-05, "loss": 0.2712, "step": 2218 }, { "epoch": 0.1771373832521753, "grad_norm": 0.2565701215127103, "learning_rate": 1.8886055718029025e-05, "loss": 0.2132, "step": 2219 }, { "epoch": 0.17721721082461883, "grad_norm": 0.26176468609746467, "learning_rate": 1.8884869542477763e-05, "loss": 0.2113, "step": 2220 }, { "epoch": 0.17729703839706235, "grad_norm": 0.3163449361741019, "learning_rate": 1.888368277300813e-05, "loss": 0.1497, "step": 2221 }, { "epoch": 0.17737686596950586, "grad_norm": 0.3133844522246631, "learning_rate": 1.8882495409699463e-05, "loss": 0.1939, "step": 2222 }, { "epoch": 0.1774566935419494, "grad_norm": 0.3123055544441517, "learning_rate": 1.8881307452631125e-05, "loss": 0.2308, "step": 2223 }, { "epoch": 0.17753652111439291, "grad_norm": 0.296612800826235, "learning_rate": 1.8880118901882537e-05, "loss": 0.2686, "step": 2224 }, { "epoch": 0.17761634868683643, "grad_norm": 0.25795726279279035, "learning_rate": 1.8878929757533138e-05, "loss": 0.1978, "step": 2225 }, { "epoch": 0.17769617625927994, "grad_norm": 0.263772582631107, "learning_rate": 1.8877740019662423e-05, "loss": 0.2406, "step": 2226 }, { "epoch": 0.17777600383172348, "grad_norm": 0.3388029969449033, "learning_rate": 1.8876549688349922e-05, "loss": 0.2051, "step": 2227 }, { "epoch": 0.177855831404167, "grad_norm": 0.3507889653595997, "learning_rate": 1.8875358763675202e-05, "loss": 0.21, "step": 2228 }, { "epoch": 0.1779356589766105, "grad_norm": 0.34547687010376876, "learning_rate": 1.887416724571787e-05, "loss": 0.2602, "step": 2229 }, { "epoch": 0.17801548654905405, "grad_norm": 0.4059711016105534, "learning_rate": 1.887297513455758e-05, "loss": 0.1978, "step": 2230 }, { "epoch": 0.17809531412149757, "grad_norm": 0.3125873213628975, "learning_rate": 1.8871782430274016e-05, "loss": 0.216, "step": 2231 }, { "epoch": 0.17817514169394108, "grad_norm": 0.3044117354294292, "learning_rate": 1.8870589132946905e-05, "loss": 0.2602, "step": 2232 }, { "epoch": 0.17825496926638462, "grad_norm": 0.4288071030076356, "learning_rate": 1.8869395242656012e-05, "loss": 0.2099, "step": 2233 }, { "epoch": 0.17833479683882814, "grad_norm": 0.3441116251078568, "learning_rate": 1.8868200759481152e-05, "loss": 0.215, "step": 2234 }, { "epoch": 0.17841462441127165, "grad_norm": 0.37610058992398726, "learning_rate": 1.8867005683502162e-05, "loss": 0.2538, "step": 2235 }, { "epoch": 0.17849445198371516, "grad_norm": 0.4233048876234568, "learning_rate": 1.886581001479894e-05, "loss": 0.2008, "step": 2236 }, { "epoch": 0.1785742795561587, "grad_norm": 0.32845991418177023, "learning_rate": 1.8864613753451397e-05, "loss": 0.1826, "step": 2237 }, { "epoch": 0.17865410712860222, "grad_norm": 0.326461554324876, "learning_rate": 1.8863416899539507e-05, "loss": 0.2403, "step": 2238 }, { "epoch": 0.17873393470104573, "grad_norm": 0.3957143200645702, "learning_rate": 1.8862219453143273e-05, "loss": 0.2184, "step": 2239 }, { "epoch": 0.17881376227348927, "grad_norm": 0.4155054075556784, "learning_rate": 1.8861021414342743e-05, "loss": 0.2524, "step": 2240 }, { "epoch": 0.1788935898459328, "grad_norm": 0.4649322738240463, "learning_rate": 1.8859822783217995e-05, "loss": 0.188, "step": 2241 }, { "epoch": 0.1789734174183763, "grad_norm": 0.46571399333845187, "learning_rate": 1.885862355984916e-05, "loss": 0.199, "step": 2242 }, { "epoch": 0.17905324499081982, "grad_norm": 0.3663724911447042, "learning_rate": 1.8857423744316392e-05, "loss": 0.1917, "step": 2243 }, { "epoch": 0.17913307256326336, "grad_norm": 0.3870860294584672, "learning_rate": 1.8856223336699903e-05, "loss": 0.1796, "step": 2244 }, { "epoch": 0.17921290013570687, "grad_norm": 0.38275227697027114, "learning_rate": 1.8855022337079934e-05, "loss": 0.1891, "step": 2245 }, { "epoch": 0.17929272770815038, "grad_norm": 0.33607591096527517, "learning_rate": 1.885382074553676e-05, "loss": 0.2061, "step": 2246 }, { "epoch": 0.17937255528059393, "grad_norm": 0.35357250361938747, "learning_rate": 1.8852618562150707e-05, "loss": 0.2189, "step": 2247 }, { "epoch": 0.17945238285303744, "grad_norm": 0.508095730716888, "learning_rate": 1.885141578700214e-05, "loss": 0.2672, "step": 2248 }, { "epoch": 0.17953221042548095, "grad_norm": 0.3463189986043709, "learning_rate": 1.8850212420171457e-05, "loss": 0.2017, "step": 2249 }, { "epoch": 0.1796120379979245, "grad_norm": 0.29702958641639193, "learning_rate": 1.8849008461739097e-05, "loss": 0.2461, "step": 2250 }, { "epoch": 0.179691865570368, "grad_norm": 0.3857589777292587, "learning_rate": 1.8847803911785537e-05, "loss": 0.208, "step": 2251 }, { "epoch": 0.17977169314281152, "grad_norm": 0.37400347876932444, "learning_rate": 1.8846598770391303e-05, "loss": 0.2138, "step": 2252 }, { "epoch": 0.17985152071525504, "grad_norm": 0.30073498613876537, "learning_rate": 1.8845393037636952e-05, "loss": 0.244, "step": 2253 }, { "epoch": 0.17993134828769858, "grad_norm": 0.3341088552964878, "learning_rate": 1.884418671360308e-05, "loss": 0.2003, "step": 2254 }, { "epoch": 0.1800111758601421, "grad_norm": 0.3511031755870376, "learning_rate": 1.8842979798370327e-05, "loss": 0.2364, "step": 2255 }, { "epoch": 0.1800910034325856, "grad_norm": 0.30949321483069514, "learning_rate": 1.884177229201937e-05, "loss": 0.2655, "step": 2256 }, { "epoch": 0.18017083100502915, "grad_norm": 0.27254432315010435, "learning_rate": 1.8840564194630924e-05, "loss": 0.2239, "step": 2257 }, { "epoch": 0.18025065857747266, "grad_norm": 0.3593227279386822, "learning_rate": 1.883935550628575e-05, "loss": 0.2217, "step": 2258 }, { "epoch": 0.18033048614991617, "grad_norm": 0.31485739211647507, "learning_rate": 1.8838146227064638e-05, "loss": 0.2858, "step": 2259 }, { "epoch": 0.18041031372235972, "grad_norm": 0.3255829567526781, "learning_rate": 1.883693635704843e-05, "loss": 0.2024, "step": 2260 }, { "epoch": 0.18049014129480323, "grad_norm": 0.30940175656285857, "learning_rate": 1.8835725896317995e-05, "loss": 0.1797, "step": 2261 }, { "epoch": 0.18056996886724674, "grad_norm": 0.41795384282968084, "learning_rate": 1.8834514844954256e-05, "loss": 0.2077, "step": 2262 }, { "epoch": 0.18064979643969026, "grad_norm": 0.31758682391525134, "learning_rate": 1.8833303203038154e-05, "loss": 0.1992, "step": 2263 }, { "epoch": 0.1807296240121338, "grad_norm": 0.351780515704211, "learning_rate": 1.88320909706507e-05, "loss": 0.2112, "step": 2264 }, { "epoch": 0.1808094515845773, "grad_norm": 0.3053627296701428, "learning_rate": 1.8830878147872907e-05, "loss": 0.2303, "step": 2265 }, { "epoch": 0.18088927915702083, "grad_norm": 0.29869612251493216, "learning_rate": 1.8829664734785864e-05, "loss": 0.2397, "step": 2266 }, { "epoch": 0.18096910672946437, "grad_norm": 0.27217211384673196, "learning_rate": 1.8828450731470675e-05, "loss": 0.2503, "step": 2267 }, { "epoch": 0.18104893430190788, "grad_norm": 0.3047951499569459, "learning_rate": 1.8827236138008488e-05, "loss": 0.2509, "step": 2268 }, { "epoch": 0.1811287618743514, "grad_norm": 0.2849841236786335, "learning_rate": 1.8826020954480503e-05, "loss": 0.233, "step": 2269 }, { "epoch": 0.1812085894467949, "grad_norm": 0.3288673147006632, "learning_rate": 1.8824805180967942e-05, "loss": 0.2133, "step": 2270 }, { "epoch": 0.18128841701923845, "grad_norm": 0.28603597741211173, "learning_rate": 1.8823588817552082e-05, "loss": 0.2431, "step": 2271 }, { "epoch": 0.18136824459168197, "grad_norm": 0.28491679101594125, "learning_rate": 1.8822371864314224e-05, "loss": 0.2621, "step": 2272 }, { "epoch": 0.18144807216412548, "grad_norm": 0.3131152643410331, "learning_rate": 1.8821154321335724e-05, "loss": 0.223, "step": 2273 }, { "epoch": 0.18152789973656902, "grad_norm": 0.28171438835753015, "learning_rate": 1.8819936188697966e-05, "loss": 0.2188, "step": 2274 }, { "epoch": 0.18160772730901253, "grad_norm": 0.31405909669018184, "learning_rate": 1.881871746648238e-05, "loss": 0.2629, "step": 2275 }, { "epoch": 0.18168755488145605, "grad_norm": 0.2735579081765555, "learning_rate": 1.8817498154770424e-05, "loss": 0.1677, "step": 2276 }, { "epoch": 0.1817673824538996, "grad_norm": 0.2923832810606024, "learning_rate": 1.8816278253643618e-05, "loss": 0.2344, "step": 2277 }, { "epoch": 0.1818472100263431, "grad_norm": 0.31747664473938103, "learning_rate": 1.8815057763183497e-05, "loss": 0.2081, "step": 2278 }, { "epoch": 0.18192703759878662, "grad_norm": 0.3066780027187838, "learning_rate": 1.881383668347165e-05, "loss": 0.247, "step": 2279 }, { "epoch": 0.18200686517123013, "grad_norm": 0.28338509330238515, "learning_rate": 1.88126150145897e-05, "loss": 0.1802, "step": 2280 }, { "epoch": 0.18208669274367367, "grad_norm": 0.30188348527671244, "learning_rate": 1.8811392756619314e-05, "loss": 0.2409, "step": 2281 }, { "epoch": 0.1821665203161172, "grad_norm": 0.26736920640118855, "learning_rate": 1.881016990964219e-05, "loss": 0.2034, "step": 2282 }, { "epoch": 0.1822463478885607, "grad_norm": 0.32599279598343694, "learning_rate": 1.8808946473740074e-05, "loss": 0.2504, "step": 2283 }, { "epoch": 0.18232617546100424, "grad_norm": 0.27407127399369513, "learning_rate": 1.880772244899475e-05, "loss": 0.1737, "step": 2284 }, { "epoch": 0.18240600303344776, "grad_norm": 0.3059303117879453, "learning_rate": 1.8806497835488033e-05, "loss": 0.2221, "step": 2285 }, { "epoch": 0.18248583060589127, "grad_norm": 0.3423319472372541, "learning_rate": 1.8805272633301785e-05, "loss": 0.1995, "step": 2286 }, { "epoch": 0.18256565817833478, "grad_norm": 0.29721159608410214, "learning_rate": 1.880404684251791e-05, "loss": 0.2188, "step": 2287 }, { "epoch": 0.18264548575077832, "grad_norm": 0.3436166338881929, "learning_rate": 1.8802820463218345e-05, "loss": 0.1859, "step": 2288 }, { "epoch": 0.18272531332322184, "grad_norm": 0.3205029164002339, "learning_rate": 1.880159349548507e-05, "loss": 0.2161, "step": 2289 }, { "epoch": 0.18280514089566535, "grad_norm": 0.48004686502655197, "learning_rate": 1.88003659394001e-05, "loss": 0.2325, "step": 2290 }, { "epoch": 0.1828849684681089, "grad_norm": 0.27687291124834174, "learning_rate": 1.8799137795045493e-05, "loss": 0.222, "step": 2291 }, { "epoch": 0.1829647960405524, "grad_norm": 0.28842986941433996, "learning_rate": 1.8797909062503343e-05, "loss": 0.2095, "step": 2292 }, { "epoch": 0.18304462361299592, "grad_norm": 0.3598424428321524, "learning_rate": 1.8796679741855794e-05, "loss": 0.2145, "step": 2293 }, { "epoch": 0.18312445118543946, "grad_norm": 0.5116352794118544, "learning_rate": 1.8795449833185016e-05, "loss": 0.2332, "step": 2294 }, { "epoch": 0.18320427875788298, "grad_norm": 0.26493048242701356, "learning_rate": 1.8794219336573223e-05, "loss": 0.1974, "step": 2295 }, { "epoch": 0.1832841063303265, "grad_norm": 0.3883368643037468, "learning_rate": 1.879298825210267e-05, "loss": 0.2109, "step": 2296 }, { "epoch": 0.18336393390277, "grad_norm": 0.4565539345582492, "learning_rate": 1.879175657985565e-05, "loss": 0.2611, "step": 2297 }, { "epoch": 0.18344376147521355, "grad_norm": 0.3673040242819984, "learning_rate": 1.8790524319914496e-05, "loss": 0.2221, "step": 2298 }, { "epoch": 0.18352358904765706, "grad_norm": 0.3264901839036125, "learning_rate": 1.8789291472361576e-05, "loss": 0.2692, "step": 2299 }, { "epoch": 0.18360341662010057, "grad_norm": 0.36988827241338607, "learning_rate": 1.8788058037279308e-05, "loss": 0.2397, "step": 2300 }, { "epoch": 0.18368324419254412, "grad_norm": 0.3399405197265598, "learning_rate": 1.8786824014750137e-05, "loss": 0.2165, "step": 2301 }, { "epoch": 0.18376307176498763, "grad_norm": 0.36973408540420805, "learning_rate": 1.8785589404856553e-05, "loss": 0.2368, "step": 2302 }, { "epoch": 0.18384289933743114, "grad_norm": 0.3514400564448958, "learning_rate": 1.8784354207681084e-05, "loss": 0.1868, "step": 2303 }, { "epoch": 0.18392272690987468, "grad_norm": 0.30198669277325985, "learning_rate": 1.8783118423306303e-05, "loss": 0.2238, "step": 2304 }, { "epoch": 0.1840025544823182, "grad_norm": 0.30739314476037943, "learning_rate": 1.8781882051814813e-05, "loss": 0.1927, "step": 2305 }, { "epoch": 0.1840823820547617, "grad_norm": 0.3591292418766352, "learning_rate": 1.8780645093289262e-05, "loss": 0.2474, "step": 2306 }, { "epoch": 0.18416220962720523, "grad_norm": 0.29284496102473173, "learning_rate": 1.8779407547812332e-05, "loss": 0.1351, "step": 2307 }, { "epoch": 0.18424203719964877, "grad_norm": 0.3624212849992769, "learning_rate": 1.8778169415466755e-05, "loss": 0.2565, "step": 2308 }, { "epoch": 0.18432186477209228, "grad_norm": 0.3067257245657464, "learning_rate": 1.8776930696335288e-05, "loss": 0.1934, "step": 2309 }, { "epoch": 0.1844016923445358, "grad_norm": 0.37295515617961467, "learning_rate": 1.877569139050074e-05, "loss": 0.1578, "step": 2310 }, { "epoch": 0.18448151991697934, "grad_norm": 0.37197761844338767, "learning_rate": 1.8774451498045955e-05, "loss": 0.1864, "step": 2311 }, { "epoch": 0.18456134748942285, "grad_norm": 0.294802012490247, "learning_rate": 1.8773211019053805e-05, "loss": 0.1928, "step": 2312 }, { "epoch": 0.18464117506186636, "grad_norm": 0.37157524870985675, "learning_rate": 1.877196995360722e-05, "loss": 0.2172, "step": 2313 }, { "epoch": 0.18472100263430988, "grad_norm": 0.33545861019202533, "learning_rate": 1.8770728301789162e-05, "loss": 0.2134, "step": 2314 }, { "epoch": 0.18480083020675342, "grad_norm": 0.3148225720369717, "learning_rate": 1.876948606368262e-05, "loss": 0.2656, "step": 2315 }, { "epoch": 0.18488065777919693, "grad_norm": 0.3222734473204391, "learning_rate": 1.8768243239370646e-05, "loss": 0.2154, "step": 2316 }, { "epoch": 0.18496048535164045, "grad_norm": 0.3051891546893886, "learning_rate": 1.8766999828936304e-05, "loss": 0.2771, "step": 2317 }, { "epoch": 0.185040312924084, "grad_norm": 0.3814015492032912, "learning_rate": 1.8765755832462723e-05, "loss": 0.2064, "step": 2318 }, { "epoch": 0.1851201404965275, "grad_norm": 0.3523063231262817, "learning_rate": 1.8764511250033053e-05, "loss": 0.2656, "step": 2319 }, { "epoch": 0.18519996806897102, "grad_norm": 0.28749507160557514, "learning_rate": 1.876326608173049e-05, "loss": 0.2348, "step": 2320 }, { "epoch": 0.18527979564141456, "grad_norm": 0.3126296213313737, "learning_rate": 1.8762020327638272e-05, "loss": 0.2424, "step": 2321 }, { "epoch": 0.18535962321385807, "grad_norm": 0.33854206173201556, "learning_rate": 1.876077398783967e-05, "loss": 0.2209, "step": 2322 }, { "epoch": 0.18543945078630159, "grad_norm": 0.3275247527157359, "learning_rate": 1.8759527062417993e-05, "loss": 0.2242, "step": 2323 }, { "epoch": 0.1855192783587451, "grad_norm": 0.35771322443221065, "learning_rate": 1.87582795514566e-05, "loss": 0.212, "step": 2324 }, { "epoch": 0.18559910593118864, "grad_norm": 0.35194942201935836, "learning_rate": 1.8757031455038878e-05, "loss": 0.2047, "step": 2325 }, { "epoch": 0.18567893350363215, "grad_norm": 0.3545259126913327, "learning_rate": 1.8755782773248258e-05, "loss": 0.2134, "step": 2326 }, { "epoch": 0.18575876107607567, "grad_norm": 0.36721042064899967, "learning_rate": 1.8754533506168212e-05, "loss": 0.2298, "step": 2327 }, { "epoch": 0.1858385886485192, "grad_norm": 0.3049798112696226, "learning_rate": 1.8753283653882245e-05, "loss": 0.2063, "step": 2328 }, { "epoch": 0.18591841622096272, "grad_norm": 0.3019980267754061, "learning_rate": 1.8752033216473906e-05, "loss": 0.2285, "step": 2329 }, { "epoch": 0.18599824379340624, "grad_norm": 0.4572484953095064, "learning_rate": 1.8750782194026785e-05, "loss": 0.2113, "step": 2330 }, { "epoch": 0.18607807136584975, "grad_norm": 0.3188356732593867, "learning_rate": 1.8749530586624502e-05, "loss": 0.2128, "step": 2331 }, { "epoch": 0.1861578989382933, "grad_norm": 0.3623382341733094, "learning_rate": 1.8748278394350725e-05, "loss": 0.2225, "step": 2332 }, { "epoch": 0.1862377265107368, "grad_norm": 0.39971378822262654, "learning_rate": 1.874702561728916e-05, "loss": 0.2155, "step": 2333 }, { "epoch": 0.18631755408318032, "grad_norm": 0.2973779074273901, "learning_rate": 1.8745772255523548e-05, "loss": 0.2837, "step": 2334 }, { "epoch": 0.18639738165562386, "grad_norm": 0.34336756097516447, "learning_rate": 1.8744518309137667e-05, "loss": 0.1901, "step": 2335 }, { "epoch": 0.18647720922806738, "grad_norm": 0.45475706335382093, "learning_rate": 1.8743263778215345e-05, "loss": 0.218, "step": 2336 }, { "epoch": 0.1865570368005109, "grad_norm": 0.3140285470533205, "learning_rate": 1.8742008662840442e-05, "loss": 0.2337, "step": 2337 }, { "epoch": 0.18663686437295443, "grad_norm": 0.31774795146537366, "learning_rate": 1.8740752963096855e-05, "loss": 0.2295, "step": 2338 }, { "epoch": 0.18671669194539794, "grad_norm": 0.3701142085539788, "learning_rate": 1.8739496679068524e-05, "loss": 0.2467, "step": 2339 }, { "epoch": 0.18679651951784146, "grad_norm": 0.3920664388465227, "learning_rate": 1.8738239810839426e-05, "loss": 0.2339, "step": 2340 }, { "epoch": 0.18687634709028497, "grad_norm": 0.3323981812205492, "learning_rate": 1.8736982358493574e-05, "loss": 0.2075, "step": 2341 }, { "epoch": 0.1869561746627285, "grad_norm": 0.3604199921871445, "learning_rate": 1.873572432211503e-05, "loss": 0.1911, "step": 2342 }, { "epoch": 0.18703600223517203, "grad_norm": 0.326098705483204, "learning_rate": 1.8734465701787884e-05, "loss": 0.2157, "step": 2343 }, { "epoch": 0.18711582980761554, "grad_norm": 0.2738342220255555, "learning_rate": 1.8733206497596276e-05, "loss": 0.2474, "step": 2344 }, { "epoch": 0.18719565738005908, "grad_norm": 0.38121055965348605, "learning_rate": 1.873194670962437e-05, "loss": 0.222, "step": 2345 }, { "epoch": 0.1872754849525026, "grad_norm": 0.3595238738435183, "learning_rate": 1.8730686337956387e-05, "loss": 0.2331, "step": 2346 }, { "epoch": 0.1873553125249461, "grad_norm": 0.31721514181685945, "learning_rate": 1.872942538267657e-05, "loss": 0.2402, "step": 2347 }, { "epoch": 0.18743514009738965, "grad_norm": 0.35037691746463295, "learning_rate": 1.8728163843869215e-05, "loss": 0.1804, "step": 2348 }, { "epoch": 0.18751496766983317, "grad_norm": 0.38073988766253225, "learning_rate": 1.8726901721618645e-05, "loss": 0.239, "step": 2349 }, { "epoch": 0.18759479524227668, "grad_norm": 0.34976058468356513, "learning_rate": 1.872563901600923e-05, "loss": 0.207, "step": 2350 }, { "epoch": 0.1876746228147202, "grad_norm": 0.31328407011834475, "learning_rate": 1.8724375727125382e-05, "loss": 0.1934, "step": 2351 }, { "epoch": 0.18775445038716373, "grad_norm": 0.29995870628824584, "learning_rate": 1.8723111855051538e-05, "loss": 0.2458, "step": 2352 }, { "epoch": 0.18783427795960725, "grad_norm": 0.35124032108492903, "learning_rate": 1.8721847399872192e-05, "loss": 0.2327, "step": 2353 }, { "epoch": 0.18791410553205076, "grad_norm": 0.3201733240905506, "learning_rate": 1.872058236167186e-05, "loss": 0.2031, "step": 2354 }, { "epoch": 0.1879939331044943, "grad_norm": 0.2828514692901268, "learning_rate": 1.871931674053511e-05, "loss": 0.1788, "step": 2355 }, { "epoch": 0.18807376067693782, "grad_norm": 0.3298124723092578, "learning_rate": 1.8718050536546545e-05, "loss": 0.2294, "step": 2356 }, { "epoch": 0.18815358824938133, "grad_norm": 0.3202656249337682, "learning_rate": 1.8716783749790797e-05, "loss": 0.1801, "step": 2357 }, { "epoch": 0.18823341582182485, "grad_norm": 0.33610416375635604, "learning_rate": 1.8715516380352553e-05, "loss": 0.2221, "step": 2358 }, { "epoch": 0.1883132433942684, "grad_norm": 0.292185613955724, "learning_rate": 1.871424842831653e-05, "loss": 0.1873, "step": 2359 }, { "epoch": 0.1883930709667119, "grad_norm": 0.3431889413704792, "learning_rate": 1.8712979893767486e-05, "loss": 0.214, "step": 2360 }, { "epoch": 0.18847289853915541, "grad_norm": 0.36699773955961873, "learning_rate": 1.8711710776790216e-05, "loss": 0.2058, "step": 2361 }, { "epoch": 0.18855272611159896, "grad_norm": 0.2871197779422498, "learning_rate": 1.8710441077469557e-05, "loss": 0.1919, "step": 2362 }, { "epoch": 0.18863255368404247, "grad_norm": 0.3074810160918572, "learning_rate": 1.8709170795890387e-05, "loss": 0.2063, "step": 2363 }, { "epoch": 0.18871238125648598, "grad_norm": 0.2676830822676812, "learning_rate": 1.870789993213761e-05, "loss": 0.1941, "step": 2364 }, { "epoch": 0.18879220882892953, "grad_norm": 0.27586546726517114, "learning_rate": 1.8706628486296185e-05, "loss": 0.245, "step": 2365 }, { "epoch": 0.18887203640137304, "grad_norm": 0.3052867298649092, "learning_rate": 1.87053564584511e-05, "loss": 0.2405, "step": 2366 }, { "epoch": 0.18895186397381655, "grad_norm": 0.2808711712896027, "learning_rate": 1.870408384868739e-05, "loss": 0.1964, "step": 2367 }, { "epoch": 0.18903169154626007, "grad_norm": 0.3447317701525469, "learning_rate": 1.870281065709012e-05, "loss": 0.1869, "step": 2368 }, { "epoch": 0.1891115191187036, "grad_norm": 0.2817455186730754, "learning_rate": 1.8701536883744394e-05, "loss": 0.1952, "step": 2369 }, { "epoch": 0.18919134669114712, "grad_norm": 0.3178598472764508, "learning_rate": 1.8700262528735368e-05, "loss": 0.1781, "step": 2370 }, { "epoch": 0.18927117426359064, "grad_norm": 0.3894689548460417, "learning_rate": 1.869898759214822e-05, "loss": 0.1801, "step": 2371 }, { "epoch": 0.18935100183603418, "grad_norm": 0.3091515678944948, "learning_rate": 1.8697712074068174e-05, "loss": 0.1655, "step": 2372 }, { "epoch": 0.1894308294084777, "grad_norm": 0.3031814103886908, "learning_rate": 1.8696435974580502e-05, "loss": 0.2104, "step": 2373 }, { "epoch": 0.1895106569809212, "grad_norm": 0.32447852534593125, "learning_rate": 1.8695159293770496e-05, "loss": 0.2124, "step": 2374 }, { "epoch": 0.18959048455336472, "grad_norm": 0.31316592305100727, "learning_rate": 1.8693882031723506e-05, "loss": 0.2079, "step": 2375 }, { "epoch": 0.18967031212580826, "grad_norm": 0.3411613881867513, "learning_rate": 1.8692604188524905e-05, "loss": 0.2241, "step": 2376 }, { "epoch": 0.18975013969825177, "grad_norm": 0.30085098348705264, "learning_rate": 1.8691325764260112e-05, "loss": 0.2028, "step": 2377 }, { "epoch": 0.1898299672706953, "grad_norm": 0.2693979312188016, "learning_rate": 1.869004675901459e-05, "loss": 0.221, "step": 2378 }, { "epoch": 0.18990979484313883, "grad_norm": 0.27721029512045786, "learning_rate": 1.868876717287383e-05, "loss": 0.2231, "step": 2379 }, { "epoch": 0.18998962241558234, "grad_norm": 0.2740660361653378, "learning_rate": 1.8687487005923374e-05, "loss": 0.1916, "step": 2380 }, { "epoch": 0.19006944998802586, "grad_norm": 0.28920127389381806, "learning_rate": 1.8686206258248793e-05, "loss": 0.2011, "step": 2381 }, { "epoch": 0.1901492775604694, "grad_norm": 0.26913890279944613, "learning_rate": 1.868492492993569e-05, "loss": 0.1796, "step": 2382 }, { "epoch": 0.1902291051329129, "grad_norm": 0.30680038564112105, "learning_rate": 1.868364302106973e-05, "loss": 0.2055, "step": 2383 }, { "epoch": 0.19030893270535643, "grad_norm": 0.3362884901639376, "learning_rate": 1.86823605317366e-05, "loss": 0.2746, "step": 2384 }, { "epoch": 0.19038876027779994, "grad_norm": 0.3010869575179624, "learning_rate": 1.868107746202203e-05, "loss": 0.2183, "step": 2385 }, { "epoch": 0.19046858785024348, "grad_norm": 0.3198418317713819, "learning_rate": 1.867979381201178e-05, "loss": 0.2289, "step": 2386 }, { "epoch": 0.190548415422687, "grad_norm": 0.333068806093837, "learning_rate": 1.867850958179167e-05, "loss": 0.2771, "step": 2387 }, { "epoch": 0.1906282429951305, "grad_norm": 0.2922395404743253, "learning_rate": 1.8677224771447536e-05, "loss": 0.2298, "step": 2388 }, { "epoch": 0.19070807056757405, "grad_norm": 0.2659895928251155, "learning_rate": 1.867593938106526e-05, "loss": 0.238, "step": 2389 }, { "epoch": 0.19078789814001756, "grad_norm": 0.44761862722300744, "learning_rate": 1.867465341073078e-05, "loss": 0.2138, "step": 2390 }, { "epoch": 0.19086772571246108, "grad_norm": 0.2802721678839336, "learning_rate": 1.8673366860530048e-05, "loss": 0.1939, "step": 2391 }, { "epoch": 0.19094755328490462, "grad_norm": 0.28804151271682576, "learning_rate": 1.867207973054906e-05, "loss": 0.1981, "step": 2392 }, { "epoch": 0.19102738085734813, "grad_norm": 0.30313403327100996, "learning_rate": 1.8670792020873862e-05, "loss": 0.193, "step": 2393 }, { "epoch": 0.19110720842979165, "grad_norm": 0.2968116963653394, "learning_rate": 1.8669503731590532e-05, "loss": 0.2367, "step": 2394 }, { "epoch": 0.19118703600223516, "grad_norm": 0.3252260110625648, "learning_rate": 1.866821486278519e-05, "loss": 0.2306, "step": 2395 }, { "epoch": 0.1912668635746787, "grad_norm": 0.3263915780065261, "learning_rate": 1.8666925414543983e-05, "loss": 0.2271, "step": 2396 }, { "epoch": 0.19134669114712222, "grad_norm": 0.29939865004622357, "learning_rate": 1.8665635386953113e-05, "loss": 0.2241, "step": 2397 }, { "epoch": 0.19142651871956573, "grad_norm": 0.44538329344725475, "learning_rate": 1.8664344780098812e-05, "loss": 0.2109, "step": 2398 }, { "epoch": 0.19150634629200927, "grad_norm": 0.3706649568875654, "learning_rate": 1.8663053594067353e-05, "loss": 0.2113, "step": 2399 }, { "epoch": 0.19158617386445279, "grad_norm": 0.276037067810996, "learning_rate": 1.866176182894504e-05, "loss": 0.2088, "step": 2400 }, { "epoch": 0.1916660014368963, "grad_norm": 0.40402194955449044, "learning_rate": 1.8660469484818227e-05, "loss": 0.2185, "step": 2401 }, { "epoch": 0.1917458290093398, "grad_norm": 0.2817814003893787, "learning_rate": 1.8659176561773305e-05, "loss": 0.2217, "step": 2402 }, { "epoch": 0.19182565658178335, "grad_norm": 0.3377704104354422, "learning_rate": 1.8657883059896698e-05, "loss": 0.1569, "step": 2403 }, { "epoch": 0.19190548415422687, "grad_norm": 0.33224989726539905, "learning_rate": 1.865658897927487e-05, "loss": 0.1835, "step": 2404 }, { "epoch": 0.19198531172667038, "grad_norm": 0.393585059476989, "learning_rate": 1.8655294319994327e-05, "loss": 0.2315, "step": 2405 }, { "epoch": 0.19206513929911392, "grad_norm": 0.32611051245207706, "learning_rate": 1.8653999082141608e-05, "loss": 0.2282, "step": 2406 }, { "epoch": 0.19214496687155744, "grad_norm": 0.3348651931005973, "learning_rate": 1.8652703265803303e-05, "loss": 0.2137, "step": 2407 }, { "epoch": 0.19222479444400095, "grad_norm": 0.3560781246366631, "learning_rate": 1.8651406871066025e-05, "loss": 0.2071, "step": 2408 }, { "epoch": 0.1923046220164445, "grad_norm": 0.3618354685627284, "learning_rate": 1.8650109898016434e-05, "loss": 0.1826, "step": 2409 }, { "epoch": 0.192384449588888, "grad_norm": 0.28153231325119443, "learning_rate": 1.864881234674123e-05, "loss": 0.2294, "step": 2410 }, { "epoch": 0.19246427716133152, "grad_norm": 0.5633153639365212, "learning_rate": 1.864751421732715e-05, "loss": 0.2315, "step": 2411 }, { "epoch": 0.19254410473377503, "grad_norm": 0.3538431220119504, "learning_rate": 1.864621550986096e-05, "loss": 0.1973, "step": 2412 }, { "epoch": 0.19262393230621858, "grad_norm": 0.43420191783441, "learning_rate": 1.8644916224429483e-05, "loss": 0.2161, "step": 2413 }, { "epoch": 0.1927037598786621, "grad_norm": 0.3954969525097655, "learning_rate": 1.8643616361119567e-05, "loss": 0.1963, "step": 2414 }, { "epoch": 0.1927835874511056, "grad_norm": 0.3793954946070599, "learning_rate": 1.8642315920018107e-05, "loss": 0.2045, "step": 2415 }, { "epoch": 0.19286341502354915, "grad_norm": 0.4150686058756423, "learning_rate": 1.8641014901212026e-05, "loss": 0.2372, "step": 2416 }, { "epoch": 0.19294324259599266, "grad_norm": 0.40506578591000825, "learning_rate": 1.8639713304788295e-05, "loss": 0.1754, "step": 2417 }, { "epoch": 0.19302307016843617, "grad_norm": 0.44332012579035734, "learning_rate": 1.8638411130833923e-05, "loss": 0.2239, "step": 2418 }, { "epoch": 0.1931028977408797, "grad_norm": 0.3464413988478698, "learning_rate": 1.8637108379435947e-05, "loss": 0.2155, "step": 2419 }, { "epoch": 0.19318272531332323, "grad_norm": 0.28352780968970226, "learning_rate": 1.8635805050681463e-05, "loss": 0.2136, "step": 2420 }, { "epoch": 0.19326255288576674, "grad_norm": 0.35583255336531416, "learning_rate": 1.8634501144657586e-05, "loss": 0.2206, "step": 2421 }, { "epoch": 0.19334238045821026, "grad_norm": 0.412189629151178, "learning_rate": 1.8633196661451476e-05, "loss": 0.2289, "step": 2422 }, { "epoch": 0.1934222080306538, "grad_norm": 0.3282470845245302, "learning_rate": 1.8631891601150337e-05, "loss": 0.189, "step": 2423 }, { "epoch": 0.1935020356030973, "grad_norm": 0.34929346718606075, "learning_rate": 1.8630585963841403e-05, "loss": 0.2236, "step": 2424 }, { "epoch": 0.19358186317554082, "grad_norm": 0.32171477415712346, "learning_rate": 1.862927974961195e-05, "loss": 0.1692, "step": 2425 }, { "epoch": 0.19366169074798437, "grad_norm": 0.29358946410242287, "learning_rate": 1.8627972958549296e-05, "loss": 0.1914, "step": 2426 }, { "epoch": 0.19374151832042788, "grad_norm": 0.33107148834202843, "learning_rate": 1.8626665590740794e-05, "loss": 0.1734, "step": 2427 }, { "epoch": 0.1938213458928714, "grad_norm": 0.4561402150344269, "learning_rate": 1.8625357646273842e-05, "loss": 0.1952, "step": 2428 }, { "epoch": 0.1939011734653149, "grad_norm": 0.4136190686826168, "learning_rate": 1.8624049125235858e-05, "loss": 0.1925, "step": 2429 }, { "epoch": 0.19398100103775845, "grad_norm": 0.30385405487213696, "learning_rate": 1.8622740027714325e-05, "loss": 0.2381, "step": 2430 }, { "epoch": 0.19406082861020196, "grad_norm": 0.28576365429317263, "learning_rate": 1.862143035379674e-05, "loss": 0.193, "step": 2431 }, { "epoch": 0.19414065618264548, "grad_norm": 0.42763960898386355, "learning_rate": 1.8620120103570655e-05, "loss": 0.2297, "step": 2432 }, { "epoch": 0.19422048375508902, "grad_norm": 0.2895358035568844, "learning_rate": 1.861880927712366e-05, "loss": 0.2185, "step": 2433 }, { "epoch": 0.19430031132753253, "grad_norm": 0.42277843101641527, "learning_rate": 1.8617497874543365e-05, "loss": 0.2198, "step": 2434 }, { "epoch": 0.19438013889997605, "grad_norm": 0.4150606433598918, "learning_rate": 1.861618589591745e-05, "loss": 0.2037, "step": 2435 }, { "epoch": 0.1944599664724196, "grad_norm": 0.3653962008351622, "learning_rate": 1.8614873341333596e-05, "loss": 0.2008, "step": 2436 }, { "epoch": 0.1945397940448631, "grad_norm": 0.2889742230561288, "learning_rate": 1.8613560210879557e-05, "loss": 0.1776, "step": 2437 }, { "epoch": 0.19461962161730662, "grad_norm": 0.3592570343525408, "learning_rate": 1.8612246504643104e-05, "loss": 0.2611, "step": 2438 }, { "epoch": 0.19469944918975013, "grad_norm": 0.3653631096618996, "learning_rate": 1.8610932222712056e-05, "loss": 0.2087, "step": 2439 }, { "epoch": 0.19477927676219367, "grad_norm": 0.2825703477717031, "learning_rate": 1.8609617365174266e-05, "loss": 0.2319, "step": 2440 }, { "epoch": 0.19485910433463718, "grad_norm": 0.3043348432627681, "learning_rate": 1.8608301932117626e-05, "loss": 0.2497, "step": 2441 }, { "epoch": 0.1949389319070807, "grad_norm": 0.39030763235922616, "learning_rate": 1.860698592363007e-05, "loss": 0.2103, "step": 2442 }, { "epoch": 0.19501875947952424, "grad_norm": 0.3048170994051087, "learning_rate": 1.8605669339799565e-05, "loss": 0.1795, "step": 2443 }, { "epoch": 0.19509858705196775, "grad_norm": 0.31808929580440976, "learning_rate": 1.860435218071412e-05, "loss": 0.2082, "step": 2444 }, { "epoch": 0.19517841462441127, "grad_norm": 0.33754537741385465, "learning_rate": 1.8603034446461784e-05, "loss": 0.1907, "step": 2445 }, { "epoch": 0.19525824219685478, "grad_norm": 0.3077625688332168, "learning_rate": 1.8601716137130642e-05, "loss": 0.2917, "step": 2446 }, { "epoch": 0.19533806976929832, "grad_norm": 0.3084129948375772, "learning_rate": 1.8600397252808815e-05, "loss": 0.2278, "step": 2447 }, { "epoch": 0.19541789734174184, "grad_norm": 0.27798233564520664, "learning_rate": 1.8599077793584467e-05, "loss": 0.2206, "step": 2448 }, { "epoch": 0.19549772491418535, "grad_norm": 0.42147594179346637, "learning_rate": 1.8597757759545797e-05, "loss": 0.1867, "step": 2449 }, { "epoch": 0.1955775524866289, "grad_norm": 0.3715813379445685, "learning_rate": 1.859643715078105e-05, "loss": 0.2027, "step": 2450 }, { "epoch": 0.1956573800590724, "grad_norm": 0.3043768846808681, "learning_rate": 1.85951159673785e-05, "loss": 0.2248, "step": 2451 }, { "epoch": 0.19573720763151592, "grad_norm": 0.37533436579164736, "learning_rate": 1.859379420942646e-05, "loss": 0.1863, "step": 2452 }, { "epoch": 0.19581703520395946, "grad_norm": 0.2939810780954293, "learning_rate": 1.859247187701328e-05, "loss": 0.2456, "step": 2453 }, { "epoch": 0.19589686277640297, "grad_norm": 0.32335707353487003, "learning_rate": 1.8591148970227368e-05, "loss": 0.2798, "step": 2454 }, { "epoch": 0.1959766903488465, "grad_norm": 0.324138463881841, "learning_rate": 1.8589825489157144e-05, "loss": 0.2629, "step": 2455 }, { "epoch": 0.19605651792129, "grad_norm": 0.2840666422310402, "learning_rate": 1.8588501433891078e-05, "loss": 0.2127, "step": 2456 }, { "epoch": 0.19613634549373354, "grad_norm": 0.31285010253362305, "learning_rate": 1.858717680451768e-05, "loss": 0.2109, "step": 2457 }, { "epoch": 0.19621617306617706, "grad_norm": 0.29947539654833216, "learning_rate": 1.8585851601125497e-05, "loss": 0.1997, "step": 2458 }, { "epoch": 0.19629600063862057, "grad_norm": 0.30747934256475246, "learning_rate": 1.858452582380311e-05, "loss": 0.2501, "step": 2459 }, { "epoch": 0.1963758282110641, "grad_norm": 0.29824802339963846, "learning_rate": 1.8583199472639146e-05, "loss": 0.2286, "step": 2460 }, { "epoch": 0.19645565578350763, "grad_norm": 0.29638288253580003, "learning_rate": 1.8581872547722264e-05, "loss": 0.2377, "step": 2461 }, { "epoch": 0.19653548335595114, "grad_norm": 0.3041376394222456, "learning_rate": 1.8580545049141164e-05, "loss": 0.205, "step": 2462 }, { "epoch": 0.19661531092839465, "grad_norm": 0.3275057270621283, "learning_rate": 1.8579216976984583e-05, "loss": 0.1666, "step": 2463 }, { "epoch": 0.1966951385008382, "grad_norm": 0.33704000129774503, "learning_rate": 1.8577888331341302e-05, "loss": 0.185, "step": 2464 }, { "epoch": 0.1967749660732817, "grad_norm": 0.25894549203173783, "learning_rate": 1.8576559112300132e-05, "loss": 0.2429, "step": 2465 }, { "epoch": 0.19685479364572522, "grad_norm": 0.31527698012437816, "learning_rate": 1.8575229319949922e-05, "loss": 0.1882, "step": 2466 }, { "epoch": 0.19693462121816876, "grad_norm": 0.32231879467261365, "learning_rate": 1.8573898954379574e-05, "loss": 0.1892, "step": 2467 }, { "epoch": 0.19701444879061228, "grad_norm": 0.29566552767534127, "learning_rate": 1.857256801567801e-05, "loss": 0.2129, "step": 2468 }, { "epoch": 0.1970942763630558, "grad_norm": 0.26279650119414455, "learning_rate": 1.8571236503934193e-05, "loss": 0.184, "step": 2469 }, { "epoch": 0.19717410393549933, "grad_norm": 0.28809326892043813, "learning_rate": 1.8569904419237142e-05, "loss": 0.1901, "step": 2470 }, { "epoch": 0.19725393150794285, "grad_norm": 0.31661316247987037, "learning_rate": 1.8568571761675893e-05, "loss": 0.2076, "step": 2471 }, { "epoch": 0.19733375908038636, "grad_norm": 0.2833288954146462, "learning_rate": 1.8567238531339534e-05, "loss": 0.2115, "step": 2472 }, { "epoch": 0.19741358665282988, "grad_norm": 0.2743825895573193, "learning_rate": 1.8565904728317182e-05, "loss": 0.2058, "step": 2473 }, { "epoch": 0.19749341422527342, "grad_norm": 0.290030450315461, "learning_rate": 1.8564570352697993e-05, "loss": 0.2232, "step": 2474 }, { "epoch": 0.19757324179771693, "grad_norm": 0.2947700501295853, "learning_rate": 1.8563235404571177e-05, "loss": 0.1997, "step": 2475 }, { "epoch": 0.19765306937016044, "grad_norm": 0.2702708165813018, "learning_rate": 1.856189988402596e-05, "loss": 0.2295, "step": 2476 }, { "epoch": 0.197732896942604, "grad_norm": 0.2727995355676376, "learning_rate": 1.8560563791151614e-05, "loss": 0.1969, "step": 2477 }, { "epoch": 0.1978127245150475, "grad_norm": 0.3898736684075964, "learning_rate": 1.8559227126037463e-05, "loss": 0.2527, "step": 2478 }, { "epoch": 0.197892552087491, "grad_norm": 0.2789686510341973, "learning_rate": 1.8557889888772848e-05, "loss": 0.1955, "step": 2479 }, { "epoch": 0.19797237965993453, "grad_norm": 0.2807462334445915, "learning_rate": 1.8556552079447164e-05, "loss": 0.2054, "step": 2480 }, { "epoch": 0.19805220723237807, "grad_norm": 0.284404488951288, "learning_rate": 1.855521369814983e-05, "loss": 0.2103, "step": 2481 }, { "epoch": 0.19813203480482158, "grad_norm": 0.28707481224176795, "learning_rate": 1.855387474497032e-05, "loss": 0.1901, "step": 2482 }, { "epoch": 0.1982118623772651, "grad_norm": 0.31842521306085875, "learning_rate": 1.8552535219998134e-05, "loss": 0.2181, "step": 2483 }, { "epoch": 0.19829168994970864, "grad_norm": 0.3156909550529479, "learning_rate": 1.8551195123322812e-05, "loss": 0.2147, "step": 2484 }, { "epoch": 0.19837151752215215, "grad_norm": 0.32433900257965087, "learning_rate": 1.8549854455033944e-05, "loss": 0.192, "step": 2485 }, { "epoch": 0.19845134509459567, "grad_norm": 0.33045524539109367, "learning_rate": 1.8548513215221137e-05, "loss": 0.2114, "step": 2486 }, { "epoch": 0.1985311726670392, "grad_norm": 0.3233603488886659, "learning_rate": 1.854717140397405e-05, "loss": 0.2081, "step": 2487 }, { "epoch": 0.19861100023948272, "grad_norm": 0.28367957324945964, "learning_rate": 1.854582902138238e-05, "loss": 0.2079, "step": 2488 }, { "epoch": 0.19869082781192623, "grad_norm": 0.31559883646060083, "learning_rate": 1.854448606753586e-05, "loss": 0.1867, "step": 2489 }, { "epoch": 0.19877065538436975, "grad_norm": 0.36891910179942716, "learning_rate": 1.854314254252426e-05, "loss": 0.1946, "step": 2490 }, { "epoch": 0.1988504829568133, "grad_norm": 0.340943039940284, "learning_rate": 1.8541798446437392e-05, "loss": 0.2259, "step": 2491 }, { "epoch": 0.1989303105292568, "grad_norm": 0.35892650668031945, "learning_rate": 1.85404537793651e-05, "loss": 0.1962, "step": 2492 }, { "epoch": 0.19901013810170032, "grad_norm": 0.3142248098960487, "learning_rate": 1.853910854139727e-05, "loss": 0.237, "step": 2493 }, { "epoch": 0.19908996567414386, "grad_norm": 0.2984243402344674, "learning_rate": 1.853776273262383e-05, "loss": 0.2421, "step": 2494 }, { "epoch": 0.19916979324658737, "grad_norm": 0.33194708312786786, "learning_rate": 1.8536416353134735e-05, "loss": 0.1745, "step": 2495 }, { "epoch": 0.1992496208190309, "grad_norm": 0.27481548673953954, "learning_rate": 1.853506940301999e-05, "loss": 0.2126, "step": 2496 }, { "epoch": 0.19932944839147443, "grad_norm": 0.3364880927902004, "learning_rate": 1.8533721882369633e-05, "loss": 0.1933, "step": 2497 }, { "epoch": 0.19940927596391794, "grad_norm": 0.3480454606379325, "learning_rate": 1.8532373791273743e-05, "loss": 0.2162, "step": 2498 }, { "epoch": 0.19948910353636146, "grad_norm": 0.38437030880942463, "learning_rate": 1.8531025129822423e-05, "loss": 0.2116, "step": 2499 }, { "epoch": 0.19956893110880497, "grad_norm": 0.28636547517308564, "learning_rate": 1.852967589810584e-05, "loss": 0.2418, "step": 2500 }, { "epoch": 0.1996487586812485, "grad_norm": 0.2943428607202013, "learning_rate": 1.852832609621418e-05, "loss": 0.1661, "step": 2501 }, { "epoch": 0.19972858625369203, "grad_norm": 0.31672536618402636, "learning_rate": 1.852697572423767e-05, "loss": 0.2079, "step": 2502 }, { "epoch": 0.19980841382613554, "grad_norm": 0.2762994355233367, "learning_rate": 1.8525624782266572e-05, "loss": 0.1801, "step": 2503 }, { "epoch": 0.19988824139857908, "grad_norm": 0.3180098106796456, "learning_rate": 1.85242732703912e-05, "loss": 0.2126, "step": 2504 }, { "epoch": 0.1999680689710226, "grad_norm": 0.2796542938570218, "learning_rate": 1.8522921188701898e-05, "loss": 0.2012, "step": 2505 }, { "epoch": 0.2000478965434661, "grad_norm": 0.42087709610362656, "learning_rate": 1.8521568537289036e-05, "loss": 0.175, "step": 2506 }, { "epoch": 0.20012772411590962, "grad_norm": 0.3080299206874086, "learning_rate": 1.8520215316243044e-05, "loss": 0.2324, "step": 2507 }, { "epoch": 0.20020755168835316, "grad_norm": 0.3465188480430205, "learning_rate": 1.851886152565438e-05, "loss": 0.2262, "step": 2508 }, { "epoch": 0.20028737926079668, "grad_norm": 0.316703409123433, "learning_rate": 1.851750716561353e-05, "loss": 0.2263, "step": 2509 }, { "epoch": 0.2003672068332402, "grad_norm": 0.2995365386426011, "learning_rate": 1.8516152236211037e-05, "loss": 0.2349, "step": 2510 }, { "epoch": 0.20044703440568373, "grad_norm": 0.3185487867230703, "learning_rate": 1.851479673753747e-05, "loss": 0.2148, "step": 2511 }, { "epoch": 0.20052686197812725, "grad_norm": 0.3440280877996833, "learning_rate": 1.8513440669683436e-05, "loss": 0.2376, "step": 2512 }, { "epoch": 0.20060668955057076, "grad_norm": 0.3199642408952908, "learning_rate": 1.8512084032739583e-05, "loss": 0.2539, "step": 2513 }, { "epoch": 0.2006865171230143, "grad_norm": 0.35771284499661976, "learning_rate": 1.8510726826796598e-05, "loss": 0.1891, "step": 2514 }, { "epoch": 0.20076634469545782, "grad_norm": 0.39454401432521385, "learning_rate": 1.8509369051945208e-05, "loss": 0.2284, "step": 2515 }, { "epoch": 0.20084617226790133, "grad_norm": 0.413063627933874, "learning_rate": 1.850801070827617e-05, "loss": 0.1972, "step": 2516 }, { "epoch": 0.20092599984034484, "grad_norm": 0.3451951085967265, "learning_rate": 1.8506651795880288e-05, "loss": 0.1904, "step": 2517 }, { "epoch": 0.20100582741278838, "grad_norm": 0.3058576820696348, "learning_rate": 1.8505292314848396e-05, "loss": 0.2185, "step": 2518 }, { "epoch": 0.2010856549852319, "grad_norm": 0.3031849801894839, "learning_rate": 1.850393226527137e-05, "loss": 0.2307, "step": 2519 }, { "epoch": 0.2011654825576754, "grad_norm": 0.2733736446011561, "learning_rate": 1.850257164724013e-05, "loss": 0.1689, "step": 2520 }, { "epoch": 0.20124531013011895, "grad_norm": 0.361344381968389, "learning_rate": 1.850121046084562e-05, "loss": 0.2482, "step": 2521 }, { "epoch": 0.20132513770256247, "grad_norm": 0.3972579281081937, "learning_rate": 1.849984870617884e-05, "loss": 0.2353, "step": 2522 }, { "epoch": 0.20140496527500598, "grad_norm": 0.3337711471765524, "learning_rate": 1.8498486383330806e-05, "loss": 0.1891, "step": 2523 }, { "epoch": 0.2014847928474495, "grad_norm": 0.2894855403791314, "learning_rate": 1.8497123492392587e-05, "loss": 0.2213, "step": 2524 }, { "epoch": 0.20156462041989304, "grad_norm": 0.4119133064944763, "learning_rate": 1.8495760033455295e-05, "loss": 0.1887, "step": 2525 }, { "epoch": 0.20164444799233655, "grad_norm": 0.3644801831676207, "learning_rate": 1.849439600661006e-05, "loss": 0.2234, "step": 2526 }, { "epoch": 0.20172427556478006, "grad_norm": 0.3760529191012434, "learning_rate": 1.8493031411948076e-05, "loss": 0.2141, "step": 2527 }, { "epoch": 0.2018041031372236, "grad_norm": 0.3224559417347403, "learning_rate": 1.8491666249560546e-05, "loss": 0.2173, "step": 2528 }, { "epoch": 0.20188393070966712, "grad_norm": 0.2982595153857075, "learning_rate": 1.8490300519538736e-05, "loss": 0.212, "step": 2529 }, { "epoch": 0.20196375828211063, "grad_norm": 0.37880815499914566, "learning_rate": 1.8488934221973933e-05, "loss": 0.199, "step": 2530 }, { "epoch": 0.20204358585455418, "grad_norm": 0.2912777514270363, "learning_rate": 1.8487567356957474e-05, "loss": 0.2261, "step": 2531 }, { "epoch": 0.2021234134269977, "grad_norm": 0.30749208046257126, "learning_rate": 1.8486199924580724e-05, "loss": 0.1808, "step": 2532 }, { "epoch": 0.2022032409994412, "grad_norm": 0.3013953256683479, "learning_rate": 1.8484831924935094e-05, "loss": 0.2215, "step": 2533 }, { "epoch": 0.20228306857188472, "grad_norm": 0.3604607714460696, "learning_rate": 1.8483463358112027e-05, "loss": 0.1871, "step": 2534 }, { "epoch": 0.20236289614432826, "grad_norm": 0.3518051195238088, "learning_rate": 1.8482094224203008e-05, "loss": 0.1957, "step": 2535 }, { "epoch": 0.20244272371677177, "grad_norm": 0.32729904218435146, "learning_rate": 1.8480724523299557e-05, "loss": 0.2443, "step": 2536 }, { "epoch": 0.20252255128921529, "grad_norm": 0.33918948670629984, "learning_rate": 1.847935425549323e-05, "loss": 0.2444, "step": 2537 }, { "epoch": 0.20260237886165883, "grad_norm": 0.3264439985933819, "learning_rate": 1.8477983420875633e-05, "loss": 0.2067, "step": 2538 }, { "epoch": 0.20268220643410234, "grad_norm": 0.31494995858666885, "learning_rate": 1.8476612019538395e-05, "loss": 0.2343, "step": 2539 }, { "epoch": 0.20276203400654585, "grad_norm": 0.28039650060282867, "learning_rate": 1.847524005157319e-05, "loss": 0.2302, "step": 2540 }, { "epoch": 0.2028418615789894, "grad_norm": 0.3478222496062365, "learning_rate": 1.847386751707172e-05, "loss": 0.1879, "step": 2541 }, { "epoch": 0.2029216891514329, "grad_norm": 0.29946946402553376, "learning_rate": 1.8472494416125745e-05, "loss": 0.1986, "step": 2542 }, { "epoch": 0.20300151672387642, "grad_norm": 0.2665442978126554, "learning_rate": 1.847112074882705e-05, "loss": 0.2098, "step": 2543 }, { "epoch": 0.20308134429631994, "grad_norm": 0.29179586910290395, "learning_rate": 1.8469746515267455e-05, "loss": 0.2246, "step": 2544 }, { "epoch": 0.20316117186876348, "grad_norm": 0.31359621887553374, "learning_rate": 1.8468371715538822e-05, "loss": 0.2077, "step": 2545 }, { "epoch": 0.203240999441207, "grad_norm": 0.24011229776458437, "learning_rate": 1.8466996349733057e-05, "loss": 0.2771, "step": 2546 }, { "epoch": 0.2033208270136505, "grad_norm": 0.32108959152999306, "learning_rate": 1.846562041794209e-05, "loss": 0.206, "step": 2547 }, { "epoch": 0.20340065458609405, "grad_norm": 0.2822450956754214, "learning_rate": 1.84642439202579e-05, "loss": 0.2063, "step": 2548 }, { "epoch": 0.20348048215853756, "grad_norm": 0.28158794813159366, "learning_rate": 1.8462866856772504e-05, "loss": 0.246, "step": 2549 }, { "epoch": 0.20356030973098108, "grad_norm": 0.3298253889006684, "learning_rate": 1.8461489227577945e-05, "loss": 0.1925, "step": 2550 }, { "epoch": 0.2036401373034246, "grad_norm": 0.2914735752853546, "learning_rate": 1.846011103276632e-05, "loss": 0.1867, "step": 2551 }, { "epoch": 0.20371996487586813, "grad_norm": 0.30154697975493266, "learning_rate": 1.8458732272429754e-05, "loss": 0.1888, "step": 2552 }, { "epoch": 0.20379979244831165, "grad_norm": 0.27259678320990177, "learning_rate": 1.8457352946660406e-05, "loss": 0.1804, "step": 2553 }, { "epoch": 0.20387962002075516, "grad_norm": 0.34981663256729695, "learning_rate": 1.8455973055550488e-05, "loss": 0.2094, "step": 2554 }, { "epoch": 0.2039594475931987, "grad_norm": 0.32058120733322165, "learning_rate": 1.845459259919223e-05, "loss": 0.246, "step": 2555 }, { "epoch": 0.20403927516564221, "grad_norm": 0.3022956647337536, "learning_rate": 1.8453211577677915e-05, "loss": 0.2101, "step": 2556 }, { "epoch": 0.20411910273808573, "grad_norm": 0.3097346707809498, "learning_rate": 1.8451829991099863e-05, "loss": 0.2428, "step": 2557 }, { "epoch": 0.20419893031052927, "grad_norm": 0.30817820927387857, "learning_rate": 1.8450447839550425e-05, "loss": 0.2093, "step": 2558 }, { "epoch": 0.20427875788297278, "grad_norm": 0.3385097849229948, "learning_rate": 1.8449065123121985e-05, "loss": 0.1875, "step": 2559 }, { "epoch": 0.2043585854554163, "grad_norm": 0.28640456054940644, "learning_rate": 1.8447681841906983e-05, "loss": 0.203, "step": 2560 }, { "epoch": 0.2044384130278598, "grad_norm": 0.3023166731583091, "learning_rate": 1.844629799599788e-05, "loss": 0.1958, "step": 2561 }, { "epoch": 0.20451824060030335, "grad_norm": 0.2993232875415932, "learning_rate": 1.844491358548718e-05, "loss": 0.2002, "step": 2562 }, { "epoch": 0.20459806817274687, "grad_norm": 0.2715106314062468, "learning_rate": 1.8443528610467427e-05, "loss": 0.2025, "step": 2563 }, { "epoch": 0.20467789574519038, "grad_norm": 0.275479353468039, "learning_rate": 1.84421430710312e-05, "loss": 0.2218, "step": 2564 }, { "epoch": 0.20475772331763392, "grad_norm": 0.32738071280123926, "learning_rate": 1.8440756967271118e-05, "loss": 0.2001, "step": 2565 }, { "epoch": 0.20483755089007744, "grad_norm": 0.3868164553154167, "learning_rate": 1.8439370299279837e-05, "loss": 0.1591, "step": 2566 }, { "epoch": 0.20491737846252095, "grad_norm": 0.33465414237853186, "learning_rate": 1.843798306715005e-05, "loss": 0.1705, "step": 2567 }, { "epoch": 0.20499720603496446, "grad_norm": 0.3268915115059132, "learning_rate": 1.843659527097449e-05, "loss": 0.2254, "step": 2568 }, { "epoch": 0.205077033607408, "grad_norm": 0.2952805321570803, "learning_rate": 1.8435206910845918e-05, "loss": 0.1774, "step": 2569 }, { "epoch": 0.20515686117985152, "grad_norm": 0.25504598169741627, "learning_rate": 1.843381798685715e-05, "loss": 0.1872, "step": 2570 }, { "epoch": 0.20523668875229503, "grad_norm": 0.3023480463541593, "learning_rate": 1.8432428499101025e-05, "loss": 0.184, "step": 2571 }, { "epoch": 0.20531651632473857, "grad_norm": 0.29347701576794216, "learning_rate": 1.8431038447670425e-05, "loss": 0.2166, "step": 2572 }, { "epoch": 0.2053963438971821, "grad_norm": 0.33047939051280006, "learning_rate": 1.8429647832658272e-05, "loss": 0.1704, "step": 2573 }, { "epoch": 0.2054761714696256, "grad_norm": 0.3526509389335925, "learning_rate": 1.842825665415752e-05, "loss": 0.1917, "step": 2574 }, { "epoch": 0.20555599904206914, "grad_norm": 0.30674096854573635, "learning_rate": 1.842686491226116e-05, "loss": 0.2507, "step": 2575 }, { "epoch": 0.20563582661451266, "grad_norm": 0.38392201386175034, "learning_rate": 1.8425472607062233e-05, "loss": 0.2234, "step": 2576 }, { "epoch": 0.20571565418695617, "grad_norm": 0.3580081046388052, "learning_rate": 1.8424079738653806e-05, "loss": 0.1956, "step": 2577 }, { "epoch": 0.20579548175939968, "grad_norm": 0.3270538590404085, "learning_rate": 1.8422686307128987e-05, "loss": 0.2256, "step": 2578 }, { "epoch": 0.20587530933184323, "grad_norm": 0.3740133145077873, "learning_rate": 1.8421292312580918e-05, "loss": 0.172, "step": 2579 }, { "epoch": 0.20595513690428674, "grad_norm": 0.3538327442910663, "learning_rate": 1.8419897755102786e-05, "loss": 0.1887, "step": 2580 }, { "epoch": 0.20603496447673025, "grad_norm": 0.27987346795295365, "learning_rate": 1.841850263478781e-05, "loss": 0.157, "step": 2581 }, { "epoch": 0.2061147920491738, "grad_norm": 0.3273084147576663, "learning_rate": 1.8417106951729245e-05, "loss": 0.227, "step": 2582 }, { "epoch": 0.2061946196216173, "grad_norm": 0.4221595501055587, "learning_rate": 1.8415710706020394e-05, "loss": 0.2075, "step": 2583 }, { "epoch": 0.20627444719406082, "grad_norm": 0.32217478638829133, "learning_rate": 1.8414313897754588e-05, "loss": 0.1941, "step": 2584 }, { "epoch": 0.20635427476650436, "grad_norm": 0.324661468391164, "learning_rate": 1.84129165270252e-05, "loss": 0.1807, "step": 2585 }, { "epoch": 0.20643410233894788, "grad_norm": 0.3112477339922917, "learning_rate": 1.8411518593925626e-05, "loss": 0.1736, "step": 2586 }, { "epoch": 0.2065139299113914, "grad_norm": 0.35485703192419626, "learning_rate": 1.8410120098549325e-05, "loss": 0.1974, "step": 2587 }, { "epoch": 0.2065937574838349, "grad_norm": 0.27150879963627084, "learning_rate": 1.840872104098978e-05, "loss": 0.1893, "step": 2588 }, { "epoch": 0.20667358505627845, "grad_norm": 0.34848930809817297, "learning_rate": 1.8407321421340514e-05, "loss": 0.1847, "step": 2589 }, { "epoch": 0.20675341262872196, "grad_norm": 0.35896490852853374, "learning_rate": 1.8405921239695078e-05, "loss": 0.165, "step": 2590 }, { "epoch": 0.20683324020116547, "grad_norm": 0.3095679879569457, "learning_rate": 1.840452049614707e-05, "loss": 0.1899, "step": 2591 }, { "epoch": 0.20691306777360902, "grad_norm": 0.2945051956661433, "learning_rate": 1.840311919079013e-05, "loss": 0.1745, "step": 2592 }, { "epoch": 0.20699289534605253, "grad_norm": 0.2972796494871313, "learning_rate": 1.840171732371793e-05, "loss": 0.2195, "step": 2593 }, { "epoch": 0.20707272291849604, "grad_norm": 0.2840385823606774, "learning_rate": 1.8400314895024173e-05, "loss": 0.171, "step": 2594 }, { "epoch": 0.20715255049093956, "grad_norm": 0.3847625235370754, "learning_rate": 1.8398911904802608e-05, "loss": 0.1987, "step": 2595 }, { "epoch": 0.2072323780633831, "grad_norm": 0.3991494996657016, "learning_rate": 1.839750835314702e-05, "loss": 0.1864, "step": 2596 }, { "epoch": 0.2073122056358266, "grad_norm": 0.28092472782120076, "learning_rate": 1.8396104240151234e-05, "loss": 0.228, "step": 2597 }, { "epoch": 0.20739203320827013, "grad_norm": 0.338991818693649, "learning_rate": 1.8394699565909105e-05, "loss": 0.2649, "step": 2598 }, { "epoch": 0.20747186078071367, "grad_norm": 0.43089859136549, "learning_rate": 1.839329433051453e-05, "loss": 0.1592, "step": 2599 }, { "epoch": 0.20755168835315718, "grad_norm": 0.37011448889194654, "learning_rate": 1.8391888534061446e-05, "loss": 0.2211, "step": 2600 }, { "epoch": 0.2076315159256007, "grad_norm": 0.40014923140238573, "learning_rate": 1.8390482176643822e-05, "loss": 0.1992, "step": 2601 }, { "epoch": 0.20771134349804424, "grad_norm": 0.4931511493077454, "learning_rate": 1.8389075258355672e-05, "loss": 0.2284, "step": 2602 }, { "epoch": 0.20779117107048775, "grad_norm": 0.43594967941582446, "learning_rate": 1.838766777929104e-05, "loss": 0.2187, "step": 2603 }, { "epoch": 0.20787099864293127, "grad_norm": 0.3696357514981833, "learning_rate": 1.8386259739544007e-05, "loss": 0.2541, "step": 2604 }, { "epoch": 0.20795082621537478, "grad_norm": 0.6036885887438891, "learning_rate": 1.8384851139208703e-05, "loss": 0.2261, "step": 2605 }, { "epoch": 0.20803065378781832, "grad_norm": 0.5149941907764035, "learning_rate": 1.838344197837928e-05, "loss": 0.2259, "step": 2606 }, { "epoch": 0.20811048136026183, "grad_norm": 0.31094316531093336, "learning_rate": 1.838203225714994e-05, "loss": 0.1831, "step": 2607 }, { "epoch": 0.20819030893270535, "grad_norm": 0.33833286599431317, "learning_rate": 1.8380621975614912e-05, "loss": 0.196, "step": 2608 }, { "epoch": 0.2082701365051489, "grad_norm": 0.5657360664117048, "learning_rate": 1.837921113386847e-05, "loss": 0.2385, "step": 2609 }, { "epoch": 0.2083499640775924, "grad_norm": 0.373348740916078, "learning_rate": 1.8377799732004927e-05, "loss": 0.2078, "step": 2610 }, { "epoch": 0.20842979165003592, "grad_norm": 0.32053745193546107, "learning_rate": 1.8376387770118622e-05, "loss": 0.1914, "step": 2611 }, { "epoch": 0.20850961922247943, "grad_norm": 0.3565506571155515, "learning_rate": 1.837497524830395e-05, "loss": 0.2297, "step": 2612 }, { "epoch": 0.20858944679492297, "grad_norm": 0.43841157618385396, "learning_rate": 1.837356216665532e-05, "loss": 0.2082, "step": 2613 }, { "epoch": 0.2086692743673665, "grad_norm": 0.30748537348556226, "learning_rate": 1.83721485252672e-05, "loss": 0.2396, "step": 2614 }, { "epoch": 0.20874910193981, "grad_norm": 0.28830699600212134, "learning_rate": 1.837073432423408e-05, "loss": 0.2335, "step": 2615 }, { "epoch": 0.20882892951225354, "grad_norm": 0.3904796246100341, "learning_rate": 1.8369319563650498e-05, "loss": 0.2166, "step": 2616 }, { "epoch": 0.20890875708469706, "grad_norm": 0.39977042625458004, "learning_rate": 1.8367904243611028e-05, "loss": 0.1669, "step": 2617 }, { "epoch": 0.20898858465714057, "grad_norm": 0.34838326416429727, "learning_rate": 1.836648836421027e-05, "loss": 0.1779, "step": 2618 }, { "epoch": 0.2090684122295841, "grad_norm": 0.3443081418238201, "learning_rate": 1.8365071925542878e-05, "loss": 0.192, "step": 2619 }, { "epoch": 0.20914823980202762, "grad_norm": 0.3760508982367623, "learning_rate": 1.836365492770353e-05, "loss": 0.1888, "step": 2620 }, { "epoch": 0.20922806737447114, "grad_norm": 0.2878648244215399, "learning_rate": 1.8362237370786948e-05, "loss": 0.213, "step": 2621 }, { "epoch": 0.20930789494691465, "grad_norm": 0.36506802509708614, "learning_rate": 1.8360819254887895e-05, "loss": 0.1899, "step": 2622 }, { "epoch": 0.2093877225193582, "grad_norm": 0.35556583451174584, "learning_rate": 1.8359400580101155e-05, "loss": 0.2218, "step": 2623 }, { "epoch": 0.2094675500918017, "grad_norm": 0.38441765583764703, "learning_rate": 1.8357981346521573e-05, "loss": 0.1969, "step": 2624 }, { "epoch": 0.20954737766424522, "grad_norm": 0.28700055690287807, "learning_rate": 1.835656155424401e-05, "loss": 0.1951, "step": 2625 }, { "epoch": 0.20962720523668876, "grad_norm": 0.3069124680594819, "learning_rate": 1.8355141203363383e-05, "loss": 0.2818, "step": 2626 }, { "epoch": 0.20970703280913228, "grad_norm": 0.41521467126397066, "learning_rate": 1.8353720293974627e-05, "loss": 0.2347, "step": 2627 }, { "epoch": 0.2097868603815758, "grad_norm": 0.28359238308493717, "learning_rate": 1.8352298826172733e-05, "loss": 0.1817, "step": 2628 }, { "epoch": 0.20986668795401933, "grad_norm": 0.3704917772670614, "learning_rate": 1.8350876800052714e-05, "loss": 0.2068, "step": 2629 }, { "epoch": 0.20994651552646285, "grad_norm": 0.3606867347651136, "learning_rate": 1.8349454215709627e-05, "loss": 0.1802, "step": 2630 }, { "epoch": 0.21002634309890636, "grad_norm": 0.3092999916248512, "learning_rate": 1.8348031073238568e-05, "loss": 0.2064, "step": 2631 }, { "epoch": 0.21010617067134987, "grad_norm": 0.35646041741044254, "learning_rate": 1.8346607372734667e-05, "loss": 0.2293, "step": 2632 }, { "epoch": 0.21018599824379341, "grad_norm": 0.3697474028341002, "learning_rate": 1.83451831142931e-05, "loss": 0.2709, "step": 2633 }, { "epoch": 0.21026582581623693, "grad_norm": 0.36214088334558503, "learning_rate": 1.8343758298009065e-05, "loss": 0.2003, "step": 2634 }, { "epoch": 0.21034565338868044, "grad_norm": 0.30501983403878413, "learning_rate": 1.8342332923977806e-05, "loss": 0.183, "step": 2635 }, { "epoch": 0.21042548096112398, "grad_norm": 0.30643193806778646, "learning_rate": 1.8340906992294604e-05, "loss": 0.2027, "step": 2636 }, { "epoch": 0.2105053085335675, "grad_norm": 0.3648425791398821, "learning_rate": 1.833948050305478e-05, "loss": 0.2017, "step": 2637 }, { "epoch": 0.210585136106011, "grad_norm": 0.38955797272963955, "learning_rate": 1.833805345635369e-05, "loss": 0.1717, "step": 2638 }, { "epoch": 0.21066496367845453, "grad_norm": 0.3421724621249618, "learning_rate": 1.8336625852286718e-05, "loss": 0.1877, "step": 2639 }, { "epoch": 0.21074479125089807, "grad_norm": 0.3684134769040281, "learning_rate": 1.83351976909493e-05, "loss": 0.202, "step": 2640 }, { "epoch": 0.21082461882334158, "grad_norm": 0.38778350277873147, "learning_rate": 1.8333768972436907e-05, "loss": 0.2187, "step": 2641 }, { "epoch": 0.2109044463957851, "grad_norm": 0.2651008460454502, "learning_rate": 1.8332339696845035e-05, "loss": 0.2337, "step": 2642 }, { "epoch": 0.21098427396822864, "grad_norm": 0.31315088768410027, "learning_rate": 1.833090986426923e-05, "loss": 0.1751, "step": 2643 }, { "epoch": 0.21106410154067215, "grad_norm": 0.30953351287426906, "learning_rate": 1.8329479474805068e-05, "loss": 0.2566, "step": 2644 }, { "epoch": 0.21114392911311566, "grad_norm": 0.31124174644092434, "learning_rate": 1.8328048528548165e-05, "loss": 0.1961, "step": 2645 }, { "epoch": 0.2112237566855592, "grad_norm": 0.3370161174776873, "learning_rate": 1.832661702559418e-05, "loss": 0.2035, "step": 2646 }, { "epoch": 0.21130358425800272, "grad_norm": 0.3020915035920843, "learning_rate": 1.8325184966038794e-05, "loss": 0.2148, "step": 2647 }, { "epoch": 0.21138341183044623, "grad_norm": 0.324160727552968, "learning_rate": 1.8323752349977745e-05, "loss": 0.2038, "step": 2648 }, { "epoch": 0.21146323940288975, "grad_norm": 0.3426096885108062, "learning_rate": 1.832231917750679e-05, "loss": 0.1852, "step": 2649 }, { "epoch": 0.2115430669753333, "grad_norm": 0.3047990203557507, "learning_rate": 1.832088544872173e-05, "loss": 0.2282, "step": 2650 }, { "epoch": 0.2116228945477768, "grad_norm": 0.3601174178514226, "learning_rate": 1.831945116371841e-05, "loss": 0.1845, "step": 2651 }, { "epoch": 0.21170272212022032, "grad_norm": 0.26991051340953437, "learning_rate": 1.83180163225927e-05, "loss": 0.197, "step": 2652 }, { "epoch": 0.21178254969266386, "grad_norm": 0.32487624326580156, "learning_rate": 1.831658092544052e-05, "loss": 0.2029, "step": 2653 }, { "epoch": 0.21186237726510737, "grad_norm": 0.39402549339159204, "learning_rate": 1.8315144972357815e-05, "loss": 0.203, "step": 2654 }, { "epoch": 0.21194220483755088, "grad_norm": 0.355938921219125, "learning_rate": 1.8313708463440574e-05, "loss": 0.2326, "step": 2655 }, { "epoch": 0.2120220324099944, "grad_norm": 0.2853282557493271, "learning_rate": 1.8312271398784827e-05, "loss": 0.2362, "step": 2656 }, { "epoch": 0.21210185998243794, "grad_norm": 0.351765730824537, "learning_rate": 1.831083377848663e-05, "loss": 0.1925, "step": 2657 }, { "epoch": 0.21218168755488145, "grad_norm": 0.3168571340799997, "learning_rate": 1.830939560264209e-05, "loss": 0.1953, "step": 2658 }, { "epoch": 0.21226151512732497, "grad_norm": 0.2863447513206712, "learning_rate": 1.830795687134733e-05, "loss": 0.1929, "step": 2659 }, { "epoch": 0.2123413426997685, "grad_norm": 0.32496545081760275, "learning_rate": 1.8306517584698534e-05, "loss": 0.1962, "step": 2660 }, { "epoch": 0.21242117027221202, "grad_norm": 0.38541461495257046, "learning_rate": 1.830507774279191e-05, "loss": 0.2099, "step": 2661 }, { "epoch": 0.21250099784465554, "grad_norm": 0.35459318654315625, "learning_rate": 1.8303637345723704e-05, "loss": 0.2078, "step": 2662 }, { "epoch": 0.21258082541709908, "grad_norm": 0.2807932471235869, "learning_rate": 1.8302196393590205e-05, "loss": 0.2008, "step": 2663 }, { "epoch": 0.2126606529895426, "grad_norm": 0.317287769394422, "learning_rate": 1.8300754886487732e-05, "loss": 0.1865, "step": 2664 }, { "epoch": 0.2127404805619861, "grad_norm": 0.3107700257764306, "learning_rate": 1.829931282451264e-05, "loss": 0.2136, "step": 2665 }, { "epoch": 0.21282030813442962, "grad_norm": 0.286698932777249, "learning_rate": 1.8297870207761333e-05, "loss": 0.2026, "step": 2666 }, { "epoch": 0.21290013570687316, "grad_norm": 0.37991283997999187, "learning_rate": 1.8296427036330243e-05, "loss": 0.2265, "step": 2667 }, { "epoch": 0.21297996327931668, "grad_norm": 0.3036511880519492, "learning_rate": 1.8294983310315837e-05, "loss": 0.2147, "step": 2668 }, { "epoch": 0.2130597908517602, "grad_norm": 0.292560773097325, "learning_rate": 1.829353902981462e-05, "loss": 0.1979, "step": 2669 }, { "epoch": 0.21313961842420373, "grad_norm": 0.3920855470071817, "learning_rate": 1.8292094194923146e-05, "loss": 0.2144, "step": 2670 }, { "epoch": 0.21321944599664724, "grad_norm": 0.32125877708036765, "learning_rate": 1.8290648805737986e-05, "loss": 0.2059, "step": 2671 }, { "epoch": 0.21329927356909076, "grad_norm": 0.3132248045018804, "learning_rate": 1.8289202862355763e-05, "loss": 0.2065, "step": 2672 }, { "epoch": 0.2133791011415343, "grad_norm": 0.33102943440187005, "learning_rate": 1.828775636487313e-05, "loss": 0.2306, "step": 2673 }, { "epoch": 0.2134589287139778, "grad_norm": 0.3493822640944255, "learning_rate": 1.8286309313386787e-05, "loss": 0.2204, "step": 2674 }, { "epoch": 0.21353875628642133, "grad_norm": 0.29921971604555087, "learning_rate": 1.8284861707993456e-05, "loss": 0.2123, "step": 2675 }, { "epoch": 0.21361858385886484, "grad_norm": 0.3632142178753503, "learning_rate": 1.828341354878991e-05, "loss": 0.2257, "step": 2676 }, { "epoch": 0.21369841143130838, "grad_norm": 0.30438938740964266, "learning_rate": 1.8281964835872946e-05, "loss": 0.163, "step": 2677 }, { "epoch": 0.2137782390037519, "grad_norm": 0.3236241516302812, "learning_rate": 1.828051556933941e-05, "loss": 0.2558, "step": 2678 }, { "epoch": 0.2138580665761954, "grad_norm": 0.3318147881476722, "learning_rate": 1.827906574928618e-05, "loss": 0.1766, "step": 2679 }, { "epoch": 0.21393789414863895, "grad_norm": 0.2896031493108623, "learning_rate": 1.827761537581016e-05, "loss": 0.186, "step": 2680 }, { "epoch": 0.21401772172108247, "grad_norm": 0.31460000907504526, "learning_rate": 1.8276164449008318e-05, "loss": 0.2307, "step": 2681 }, { "epoch": 0.21409754929352598, "grad_norm": 0.36429262890057623, "learning_rate": 1.827471296897763e-05, "loss": 0.2054, "step": 2682 }, { "epoch": 0.2141773768659695, "grad_norm": 0.2947417643213497, "learning_rate": 1.827326093581513e-05, "loss": 0.1811, "step": 2683 }, { "epoch": 0.21425720443841303, "grad_norm": 0.39923318799355995, "learning_rate": 1.8271808349617876e-05, "loss": 0.243, "step": 2684 }, { "epoch": 0.21433703201085655, "grad_norm": 0.28745007970482406, "learning_rate": 1.827035521048297e-05, "loss": 0.2302, "step": 2685 }, { "epoch": 0.21441685958330006, "grad_norm": 0.32084527198716306, "learning_rate": 1.8268901518507546e-05, "loss": 0.1989, "step": 2686 }, { "epoch": 0.2144966871557436, "grad_norm": 0.4500669188816551, "learning_rate": 1.826744727378878e-05, "loss": 0.1916, "step": 2687 }, { "epoch": 0.21457651472818712, "grad_norm": 0.35388285664229946, "learning_rate": 1.826599247642388e-05, "loss": 0.2005, "step": 2688 }, { "epoch": 0.21465634230063063, "grad_norm": 0.41358307797109645, "learning_rate": 1.8264537126510092e-05, "loss": 0.1984, "step": 2689 }, { "epoch": 0.21473616987307417, "grad_norm": 0.30598446528326223, "learning_rate": 1.8263081224144707e-05, "loss": 0.1701, "step": 2690 }, { "epoch": 0.2148159974455177, "grad_norm": 0.3702214041110097, "learning_rate": 1.8261624769425045e-05, "loss": 0.2305, "step": 2691 }, { "epoch": 0.2148958250179612, "grad_norm": 0.33644786940340476, "learning_rate": 1.826016776244846e-05, "loss": 0.2234, "step": 2692 }, { "epoch": 0.21497565259040471, "grad_norm": 0.2770070179545166, "learning_rate": 1.825871020331235e-05, "loss": 0.2145, "step": 2693 }, { "epoch": 0.21505548016284826, "grad_norm": 0.3419330241847072, "learning_rate": 1.8257252092114143e-05, "loss": 0.1992, "step": 2694 }, { "epoch": 0.21513530773529177, "grad_norm": 0.4092611926524747, "learning_rate": 1.8255793428951315e-05, "loss": 0.2244, "step": 2695 }, { "epoch": 0.21521513530773528, "grad_norm": 0.3380672572905165, "learning_rate": 1.8254334213921365e-05, "loss": 0.203, "step": 2696 }, { "epoch": 0.21529496288017883, "grad_norm": 0.3657690452416782, "learning_rate": 1.8252874447121837e-05, "loss": 0.2239, "step": 2697 }, { "epoch": 0.21537479045262234, "grad_norm": 0.3411971534904674, "learning_rate": 1.8251414128650316e-05, "loss": 0.2222, "step": 2698 }, { "epoch": 0.21545461802506585, "grad_norm": 0.3205005029495053, "learning_rate": 1.8249953258604415e-05, "loss": 0.2034, "step": 2699 }, { "epoch": 0.21553444559750937, "grad_norm": 0.2952452707690457, "learning_rate": 1.8248491837081786e-05, "loss": 0.1861, "step": 2700 }, { "epoch": 0.2156142731699529, "grad_norm": 0.37510704251715066, "learning_rate": 1.8247029864180115e-05, "loss": 0.1892, "step": 2701 }, { "epoch": 0.21569410074239642, "grad_norm": 0.3308023770355888, "learning_rate": 1.8245567339997145e-05, "loss": 0.1894, "step": 2702 }, { "epoch": 0.21577392831483994, "grad_norm": 0.31445433636022346, "learning_rate": 1.824410426463062e-05, "loss": 0.1679, "step": 2703 }, { "epoch": 0.21585375588728348, "grad_norm": 0.3197446401382678, "learning_rate": 1.8242640638178356e-05, "loss": 0.2197, "step": 2704 }, { "epoch": 0.215933583459727, "grad_norm": 0.2831488983852892, "learning_rate": 1.8241176460738183e-05, "loss": 0.2037, "step": 2705 }, { "epoch": 0.2160134110321705, "grad_norm": 0.3358995690637415, "learning_rate": 1.823971173240798e-05, "loss": 0.1544, "step": 2706 }, { "epoch": 0.21609323860461405, "grad_norm": 0.3170048291264033, "learning_rate": 1.8238246453285657e-05, "loss": 0.1885, "step": 2707 }, { "epoch": 0.21617306617705756, "grad_norm": 0.31605006796586366, "learning_rate": 1.8236780623469155e-05, "loss": 0.2553, "step": 2708 }, { "epoch": 0.21625289374950107, "grad_norm": 0.33438382586481297, "learning_rate": 1.8235314243056466e-05, "loss": 0.232, "step": 2709 }, { "epoch": 0.2163327213219446, "grad_norm": 0.37041021616513853, "learning_rate": 1.8233847312145614e-05, "loss": 0.2284, "step": 2710 }, { "epoch": 0.21641254889438813, "grad_norm": 0.28318179633168333, "learning_rate": 1.823237983083465e-05, "loss": 0.2098, "step": 2711 }, { "epoch": 0.21649237646683164, "grad_norm": 0.37795519853225, "learning_rate": 1.8230911799221677e-05, "loss": 0.1967, "step": 2712 }, { "epoch": 0.21657220403927516, "grad_norm": 0.3090928370491984, "learning_rate": 1.8229443217404822e-05, "loss": 0.2505, "step": 2713 }, { "epoch": 0.2166520316117187, "grad_norm": 0.37132116187924974, "learning_rate": 1.822797408548225e-05, "loss": 0.2213, "step": 2714 }, { "epoch": 0.2167318591841622, "grad_norm": 0.38094501845648115, "learning_rate": 1.822650440355218e-05, "loss": 0.1991, "step": 2715 }, { "epoch": 0.21681168675660573, "grad_norm": 0.30830924234915535, "learning_rate": 1.8225034171712842e-05, "loss": 0.203, "step": 2716 }, { "epoch": 0.21689151432904924, "grad_norm": 0.3568014794261275, "learning_rate": 1.8223563390062523e-05, "loss": 0.2142, "step": 2717 }, { "epoch": 0.21697134190149278, "grad_norm": 0.4338196017957077, "learning_rate": 1.8222092058699533e-05, "loss": 0.1939, "step": 2718 }, { "epoch": 0.2170511694739363, "grad_norm": 0.3418212266932969, "learning_rate": 1.8220620177722225e-05, "loss": 0.1984, "step": 2719 }, { "epoch": 0.2171309970463798, "grad_norm": 0.41640040692089425, "learning_rate": 1.8219147747228995e-05, "loss": 0.1788, "step": 2720 }, { "epoch": 0.21721082461882335, "grad_norm": 0.3987479451501515, "learning_rate": 1.8217674767318262e-05, "loss": 0.1869, "step": 2721 }, { "epoch": 0.21729065219126686, "grad_norm": 0.3140041171507757, "learning_rate": 1.8216201238088493e-05, "loss": 0.1761, "step": 2722 }, { "epoch": 0.21737047976371038, "grad_norm": 0.28797941885627965, "learning_rate": 1.8214727159638183e-05, "loss": 0.2314, "step": 2723 }, { "epoch": 0.21745030733615392, "grad_norm": 0.27986934127346097, "learning_rate": 1.8213252532065876e-05, "loss": 0.2121, "step": 2724 }, { "epoch": 0.21753013490859743, "grad_norm": 0.3208390002191292, "learning_rate": 1.8211777355470137e-05, "loss": 0.1967, "step": 2725 }, { "epoch": 0.21760996248104095, "grad_norm": 0.3528393852379345, "learning_rate": 1.821030162994958e-05, "loss": 0.239, "step": 2726 }, { "epoch": 0.21768979005348446, "grad_norm": 0.30556255658064474, "learning_rate": 1.8208825355602852e-05, "loss": 0.2422, "step": 2727 }, { "epoch": 0.217769617625928, "grad_norm": 0.27125294635820923, "learning_rate": 1.8207348532528634e-05, "loss": 0.2161, "step": 2728 }, { "epoch": 0.21784944519837152, "grad_norm": 0.29686785875997823, "learning_rate": 1.8205871160825646e-05, "loss": 0.2075, "step": 2729 }, { "epoch": 0.21792927277081503, "grad_norm": 0.4026445313214259, "learning_rate": 1.8204393240592647e-05, "loss": 0.2237, "step": 2730 }, { "epoch": 0.21800910034325857, "grad_norm": 0.400046035815169, "learning_rate": 1.820291477192843e-05, "loss": 0.1605, "step": 2731 }, { "epoch": 0.21808892791570209, "grad_norm": 0.3294711563286794, "learning_rate": 1.8201435754931817e-05, "loss": 0.2146, "step": 2732 }, { "epoch": 0.2181687554881456, "grad_norm": 0.3522089798891832, "learning_rate": 1.8199956189701683e-05, "loss": 0.1635, "step": 2733 }, { "epoch": 0.21824858306058914, "grad_norm": 0.3831470265998637, "learning_rate": 1.819847607633693e-05, "loss": 0.1778, "step": 2734 }, { "epoch": 0.21832841063303265, "grad_norm": 0.2823568711537277, "learning_rate": 1.8196995414936492e-05, "loss": 0.197, "step": 2735 }, { "epoch": 0.21840823820547617, "grad_norm": 0.3586928320469299, "learning_rate": 1.8195514205599352e-05, "loss": 0.2457, "step": 2736 }, { "epoch": 0.21848806577791968, "grad_norm": 0.40267561010504727, "learning_rate": 1.819403244842452e-05, "loss": 0.2638, "step": 2737 }, { "epoch": 0.21856789335036322, "grad_norm": 0.31701348443677546, "learning_rate": 1.819255014351105e-05, "loss": 0.2533, "step": 2738 }, { "epoch": 0.21864772092280674, "grad_norm": 0.36154608822530976, "learning_rate": 1.819106729095802e-05, "loss": 0.2133, "step": 2739 }, { "epoch": 0.21872754849525025, "grad_norm": 0.3102557882302213, "learning_rate": 1.818958389086456e-05, "loss": 0.2317, "step": 2740 }, { "epoch": 0.2188073760676938, "grad_norm": 0.35532117117738266, "learning_rate": 1.8188099943329823e-05, "loss": 0.2566, "step": 2741 }, { "epoch": 0.2188872036401373, "grad_norm": 0.3024619166866161, "learning_rate": 1.8186615448453012e-05, "loss": 0.1566, "step": 2742 }, { "epoch": 0.21896703121258082, "grad_norm": 0.45855757951285686, "learning_rate": 1.8185130406333355e-05, "loss": 0.2103, "step": 2743 }, { "epoch": 0.21904685878502433, "grad_norm": 0.33757400688640854, "learning_rate": 1.818364481707012e-05, "loss": 0.2049, "step": 2744 }, { "epoch": 0.21912668635746788, "grad_norm": 0.2809265445909669, "learning_rate": 1.8182158680762614e-05, "loss": 0.1901, "step": 2745 }, { "epoch": 0.2192065139299114, "grad_norm": 0.3812657770909674, "learning_rate": 1.8180671997510188e-05, "loss": 0.2031, "step": 2746 }, { "epoch": 0.2192863415023549, "grad_norm": 0.38725051040026714, "learning_rate": 1.8179184767412206e-05, "loss": 0.2345, "step": 2747 }, { "epoch": 0.21936616907479844, "grad_norm": 0.3257205468607974, "learning_rate": 1.8177696990568094e-05, "loss": 0.2235, "step": 2748 }, { "epoch": 0.21944599664724196, "grad_norm": 0.3102819158776292, "learning_rate": 1.81762086670773e-05, "loss": 0.2085, "step": 2749 }, { "epoch": 0.21952582421968547, "grad_norm": 0.3589822381124218, "learning_rate": 1.8174719797039314e-05, "loss": 0.2393, "step": 2750 }, { "epoch": 0.21960565179212901, "grad_norm": 0.31714408504607894, "learning_rate": 1.8173230380553658e-05, "loss": 0.2058, "step": 2751 }, { "epoch": 0.21968547936457253, "grad_norm": 0.2959719464652958, "learning_rate": 1.81717404177199e-05, "loss": 0.1802, "step": 2752 }, { "epoch": 0.21976530693701604, "grad_norm": 0.3438199352015245, "learning_rate": 1.817024990863763e-05, "loss": 0.2251, "step": 2753 }, { "epoch": 0.21984513450945956, "grad_norm": 0.3136657353773222, "learning_rate": 1.816875885340649e-05, "loss": 0.1918, "step": 2754 }, { "epoch": 0.2199249620819031, "grad_norm": 0.38919015913572497, "learning_rate": 1.8167267252126148e-05, "loss": 0.1808, "step": 2755 }, { "epoch": 0.2200047896543466, "grad_norm": 0.3753745999466151, "learning_rate": 1.816577510489631e-05, "loss": 0.2228, "step": 2756 }, { "epoch": 0.22008461722679012, "grad_norm": 0.3083435850415546, "learning_rate": 1.8164282411816718e-05, "loss": 0.1988, "step": 2757 }, { "epoch": 0.22016444479923367, "grad_norm": 0.2908950421546409, "learning_rate": 1.8162789172987162e-05, "loss": 0.1969, "step": 2758 }, { "epoch": 0.22024427237167718, "grad_norm": 0.37324494703826633, "learning_rate": 1.816129538850745e-05, "loss": 0.2326, "step": 2759 }, { "epoch": 0.2203240999441207, "grad_norm": 0.34460491577587266, "learning_rate": 1.815980105847744e-05, "loss": 0.2184, "step": 2760 }, { "epoch": 0.2204039275165642, "grad_norm": 0.26714202308444973, "learning_rate": 1.8158306182997025e-05, "loss": 0.2097, "step": 2761 }, { "epoch": 0.22048375508900775, "grad_norm": 0.4051855132015785, "learning_rate": 1.8156810762166124e-05, "loss": 0.205, "step": 2762 }, { "epoch": 0.22056358266145126, "grad_norm": 0.4929241098161814, "learning_rate": 1.8155314796084702e-05, "loss": 0.2006, "step": 2763 }, { "epoch": 0.22064341023389478, "grad_norm": 0.2993353565890143, "learning_rate": 1.815381828485276e-05, "loss": 0.2202, "step": 2764 }, { "epoch": 0.22072323780633832, "grad_norm": 0.3192648017098214, "learning_rate": 1.8152321228570336e-05, "loss": 0.1868, "step": 2765 }, { "epoch": 0.22080306537878183, "grad_norm": 0.41153680520883934, "learning_rate": 1.81508236273375e-05, "loss": 0.2006, "step": 2766 }, { "epoch": 0.22088289295122535, "grad_norm": 0.3695246539373747, "learning_rate": 1.8149325481254357e-05, "loss": 0.1985, "step": 2767 }, { "epoch": 0.2209627205236689, "grad_norm": 0.29586495179090133, "learning_rate": 1.814782679042106e-05, "loss": 0.189, "step": 2768 }, { "epoch": 0.2210425480961124, "grad_norm": 0.287810787395635, "learning_rate": 1.8146327554937782e-05, "loss": 0.2102, "step": 2769 }, { "epoch": 0.22112237566855591, "grad_norm": 0.3649714638888617, "learning_rate": 1.814482777490475e-05, "loss": 0.178, "step": 2770 }, { "epoch": 0.22120220324099943, "grad_norm": 0.3004190920685184, "learning_rate": 1.8143327450422214e-05, "loss": 0.2146, "step": 2771 }, { "epoch": 0.22128203081344297, "grad_norm": 0.40625436571462953, "learning_rate": 1.814182658159046e-05, "loss": 0.2585, "step": 2772 }, { "epoch": 0.22136185838588648, "grad_norm": 0.38199799849358934, "learning_rate": 1.8140325168509827e-05, "loss": 0.183, "step": 2773 }, { "epoch": 0.22144168595833, "grad_norm": 0.3159210229782994, "learning_rate": 1.8138823211280666e-05, "loss": 0.1914, "step": 2774 }, { "epoch": 0.22152151353077354, "grad_norm": 0.3810127443638702, "learning_rate": 1.8137320710003384e-05, "loss": 0.1742, "step": 2775 }, { "epoch": 0.22160134110321705, "grad_norm": 0.38929537483355064, "learning_rate": 1.8135817664778413e-05, "loss": 0.2096, "step": 2776 }, { "epoch": 0.22168116867566057, "grad_norm": 0.316077259522932, "learning_rate": 1.813431407570623e-05, "loss": 0.2219, "step": 2777 }, { "epoch": 0.2217609962481041, "grad_norm": 0.42855766177042265, "learning_rate": 1.813280994288734e-05, "loss": 0.2142, "step": 2778 }, { "epoch": 0.22184082382054762, "grad_norm": 0.42427469475721163, "learning_rate": 1.813130526642229e-05, "loss": 0.182, "step": 2779 }, { "epoch": 0.22192065139299114, "grad_norm": 0.3500136766751334, "learning_rate": 1.8129800046411666e-05, "loss": 0.1799, "step": 2780 }, { "epoch": 0.22200047896543465, "grad_norm": 0.35295732423270243, "learning_rate": 1.8128294282956077e-05, "loss": 0.2219, "step": 2781 }, { "epoch": 0.2220803065378782, "grad_norm": 0.3572492792069211, "learning_rate": 1.8126787976156186e-05, "loss": 0.1692, "step": 2782 }, { "epoch": 0.2221601341103217, "grad_norm": 0.3582957313930306, "learning_rate": 1.8125281126112678e-05, "loss": 0.2114, "step": 2783 }, { "epoch": 0.22223996168276522, "grad_norm": 0.3155321189448729, "learning_rate": 1.8123773732926283e-05, "loss": 0.1992, "step": 2784 }, { "epoch": 0.22231978925520876, "grad_norm": 0.3291438281011915, "learning_rate": 1.8122265796697762e-05, "loss": 0.2481, "step": 2785 }, { "epoch": 0.22239961682765227, "grad_norm": 0.40623528394284714, "learning_rate": 1.812075731752792e-05, "loss": 0.1747, "step": 2786 }, { "epoch": 0.2224794444000958, "grad_norm": 0.3807181248879087, "learning_rate": 1.8119248295517583e-05, "loss": 0.1597, "step": 2787 }, { "epoch": 0.2225592719725393, "grad_norm": 0.38042718127275077, "learning_rate": 1.811773873076763e-05, "loss": 0.2109, "step": 2788 }, { "epoch": 0.22263909954498284, "grad_norm": 0.35112689048608803, "learning_rate": 1.8116228623378967e-05, "loss": 0.2025, "step": 2789 }, { "epoch": 0.22271892711742636, "grad_norm": 0.33051045276191837, "learning_rate": 1.8114717973452545e-05, "loss": 0.2206, "step": 2790 }, { "epoch": 0.22279875468986987, "grad_norm": 0.3927606765902243, "learning_rate": 1.8113206781089333e-05, "loss": 0.148, "step": 2791 }, { "epoch": 0.2228785822623134, "grad_norm": 0.29836148516177174, "learning_rate": 1.811169504639036e-05, "loss": 0.1966, "step": 2792 }, { "epoch": 0.22295840983475693, "grad_norm": 0.2793484263968683, "learning_rate": 1.8110182769456675e-05, "loss": 0.2315, "step": 2793 }, { "epoch": 0.22303823740720044, "grad_norm": 0.2890303219084378, "learning_rate": 1.8108669950389365e-05, "loss": 0.2193, "step": 2794 }, { "epoch": 0.22311806497964398, "grad_norm": 0.3177010077813603, "learning_rate": 1.8107156589289557e-05, "loss": 0.2242, "step": 2795 }, { "epoch": 0.2231978925520875, "grad_norm": 0.3131393857053858, "learning_rate": 1.8105642686258416e-05, "loss": 0.1921, "step": 2796 }, { "epoch": 0.223277720124531, "grad_norm": 0.32006277879867295, "learning_rate": 1.810412824139714e-05, "loss": 0.2248, "step": 2797 }, { "epoch": 0.22335754769697452, "grad_norm": 0.33504941463004145, "learning_rate": 1.8102613254806962e-05, "loss": 0.203, "step": 2798 }, { "epoch": 0.22343737526941806, "grad_norm": 0.27980176108027416, "learning_rate": 1.8101097726589155e-05, "loss": 0.2799, "step": 2799 }, { "epoch": 0.22351720284186158, "grad_norm": 0.2866844406823502, "learning_rate": 1.809958165684502e-05, "loss": 0.1745, "step": 2800 }, { "epoch": 0.2235970304143051, "grad_norm": 0.32241677765161697, "learning_rate": 1.8098065045675906e-05, "loss": 0.1727, "step": 2801 }, { "epoch": 0.22367685798674863, "grad_norm": 0.3157062844346252, "learning_rate": 1.8096547893183192e-05, "loss": 0.2424, "step": 2802 }, { "epoch": 0.22375668555919215, "grad_norm": 0.3724259869141807, "learning_rate": 1.8095030199468294e-05, "loss": 0.2031, "step": 2803 }, { "epoch": 0.22383651313163566, "grad_norm": 0.3551347477262519, "learning_rate": 1.8093511964632663e-05, "loss": 0.2255, "step": 2804 }, { "epoch": 0.22391634070407918, "grad_norm": 0.3798904661894119, "learning_rate": 1.8091993188777785e-05, "loss": 0.1714, "step": 2805 }, { "epoch": 0.22399616827652272, "grad_norm": 0.30729311844378837, "learning_rate": 1.8090473872005185e-05, "loss": 0.1984, "step": 2806 }, { "epoch": 0.22407599584896623, "grad_norm": 0.27043180122806754, "learning_rate": 1.8088954014416423e-05, "loss": 0.1829, "step": 2807 }, { "epoch": 0.22415582342140974, "grad_norm": 0.38200112962245125, "learning_rate": 1.80874336161131e-05, "loss": 0.2675, "step": 2808 }, { "epoch": 0.22423565099385329, "grad_norm": 0.27278816967670316, "learning_rate": 1.8085912677196846e-05, "loss": 0.2525, "step": 2809 }, { "epoch": 0.2243154785662968, "grad_norm": 0.2774143066692551, "learning_rate": 1.8084391197769323e-05, "loss": 0.1724, "step": 2810 }, { "epoch": 0.2243953061387403, "grad_norm": 0.34592740783963544, "learning_rate": 1.8082869177932248e-05, "loss": 0.1716, "step": 2811 }, { "epoch": 0.22447513371118386, "grad_norm": 0.26583103010423, "learning_rate": 1.8081346617787354e-05, "loss": 0.2233, "step": 2812 }, { "epoch": 0.22455496128362737, "grad_norm": 0.29278911397555, "learning_rate": 1.8079823517436417e-05, "loss": 0.1911, "step": 2813 }, { "epoch": 0.22463478885607088, "grad_norm": 0.28677080047361847, "learning_rate": 1.8078299876981257e-05, "loss": 0.1925, "step": 2814 }, { "epoch": 0.2247146164285144, "grad_norm": 0.33586642672206807, "learning_rate": 1.8076775696523715e-05, "loss": 0.233, "step": 2815 }, { "epoch": 0.22479444400095794, "grad_norm": 0.2685019108038102, "learning_rate": 1.8075250976165684e-05, "loss": 0.2336, "step": 2816 }, { "epoch": 0.22487427157340145, "grad_norm": 0.29484368857980486, "learning_rate": 1.8073725716009084e-05, "loss": 0.1881, "step": 2817 }, { "epoch": 0.22495409914584497, "grad_norm": 0.32386582207189624, "learning_rate": 1.807219991615587e-05, "loss": 0.1854, "step": 2818 }, { "epoch": 0.2250339267182885, "grad_norm": 0.2393793742845292, "learning_rate": 1.8070673576708036e-05, "loss": 0.2271, "step": 2819 }, { "epoch": 0.22511375429073202, "grad_norm": 0.2786934601612454, "learning_rate": 1.806914669776761e-05, "loss": 0.2281, "step": 2820 }, { "epoch": 0.22519358186317553, "grad_norm": 0.28113033154631056, "learning_rate": 1.8067619279436662e-05, "loss": 0.1949, "step": 2821 }, { "epoch": 0.22527340943561908, "grad_norm": 0.35877470920255194, "learning_rate": 1.8066091321817295e-05, "loss": 0.1995, "step": 2822 }, { "epoch": 0.2253532370080626, "grad_norm": 0.28956607890646896, "learning_rate": 1.806456282501164e-05, "loss": 0.1966, "step": 2823 }, { "epoch": 0.2254330645805061, "grad_norm": 0.3177933196091401, "learning_rate": 1.8063033789121877e-05, "loss": 0.1863, "step": 2824 }, { "epoch": 0.22551289215294962, "grad_norm": 0.3221791181671026, "learning_rate": 1.8061504214250215e-05, "loss": 0.1501, "step": 2825 }, { "epoch": 0.22559271972539316, "grad_norm": 0.3203106348762647, "learning_rate": 1.80599741004989e-05, "loss": 0.2337, "step": 2826 }, { "epoch": 0.22567254729783667, "grad_norm": 0.34449111969643637, "learning_rate": 1.8058443447970207e-05, "loss": 0.1923, "step": 2827 }, { "epoch": 0.2257523748702802, "grad_norm": 0.31812790421605003, "learning_rate": 1.8056912256766467e-05, "loss": 0.1751, "step": 2828 }, { "epoch": 0.22583220244272373, "grad_norm": 0.35945054240193625, "learning_rate": 1.8055380526990024e-05, "loss": 0.2303, "step": 2829 }, { "epoch": 0.22591203001516724, "grad_norm": 0.3133964935484587, "learning_rate": 1.805384825874327e-05, "loss": 0.2149, "step": 2830 }, { "epoch": 0.22599185758761076, "grad_norm": 0.3599844550167939, "learning_rate": 1.8052315452128634e-05, "loss": 0.224, "step": 2831 }, { "epoch": 0.22607168516005427, "grad_norm": 0.3160048432517387, "learning_rate": 1.805078210724858e-05, "loss": 0.1951, "step": 2832 }, { "epoch": 0.2261515127324978, "grad_norm": 0.3636042650172429, "learning_rate": 1.8049248224205597e-05, "loss": 0.2094, "step": 2833 }, { "epoch": 0.22623134030494133, "grad_norm": 0.33283568656540785, "learning_rate": 1.8047713803102226e-05, "loss": 0.1833, "step": 2834 }, { "epoch": 0.22631116787738484, "grad_norm": 0.411984981011994, "learning_rate": 1.8046178844041037e-05, "loss": 0.1809, "step": 2835 }, { "epoch": 0.22639099544982838, "grad_norm": 0.25354715579949566, "learning_rate": 1.804464334712463e-05, "loss": 0.155, "step": 2836 }, { "epoch": 0.2264708230222719, "grad_norm": 0.28031044067788563, "learning_rate": 1.8043107312455657e-05, "loss": 0.2087, "step": 2837 }, { "epoch": 0.2265506505947154, "grad_norm": 0.32692584168553457, "learning_rate": 1.8041570740136788e-05, "loss": 0.2429, "step": 2838 }, { "epoch": 0.22663047816715895, "grad_norm": 0.32516470284366206, "learning_rate": 1.8040033630270738e-05, "loss": 0.1828, "step": 2839 }, { "epoch": 0.22671030573960246, "grad_norm": 0.3192100196108678, "learning_rate": 1.8038495982960258e-05, "loss": 0.2425, "step": 2840 }, { "epoch": 0.22679013331204598, "grad_norm": 0.35399617600664585, "learning_rate": 1.8036957798308135e-05, "loss": 0.176, "step": 2841 }, { "epoch": 0.2268699608844895, "grad_norm": 0.37923169194434336, "learning_rate": 1.8035419076417187e-05, "loss": 0.1861, "step": 2842 }, { "epoch": 0.22694978845693303, "grad_norm": 0.3569880362332447, "learning_rate": 1.8033879817390273e-05, "loss": 0.1906, "step": 2843 }, { "epoch": 0.22702961602937655, "grad_norm": 0.3080703192469126, "learning_rate": 1.8032340021330288e-05, "loss": 0.1688, "step": 2844 }, { "epoch": 0.22710944360182006, "grad_norm": 0.31773909273936446, "learning_rate": 1.803079968834016e-05, "loss": 0.2258, "step": 2845 }, { "epoch": 0.2271892711742636, "grad_norm": 0.2763471724648172, "learning_rate": 1.8029258818522854e-05, "loss": 0.1496, "step": 2846 }, { "epoch": 0.22726909874670712, "grad_norm": 0.3520448208132364, "learning_rate": 1.8027717411981366e-05, "loss": 0.1736, "step": 2847 }, { "epoch": 0.22734892631915063, "grad_norm": 0.34901643191312665, "learning_rate": 1.8026175468818745e-05, "loss": 0.2199, "step": 2848 }, { "epoch": 0.22742875389159414, "grad_norm": 0.3012595441095303, "learning_rate": 1.8024632989138053e-05, "loss": 0.1844, "step": 2849 }, { "epoch": 0.22750858146403768, "grad_norm": 0.48337612772525956, "learning_rate": 1.8023089973042407e-05, "loss": 0.1687, "step": 2850 }, { "epoch": 0.2275884090364812, "grad_norm": 0.33563464612428806, "learning_rate": 1.8021546420634945e-05, "loss": 0.1707, "step": 2851 }, { "epoch": 0.2276682366089247, "grad_norm": 0.32052539267109237, "learning_rate": 1.8020002332018845e-05, "loss": 0.2149, "step": 2852 }, { "epoch": 0.22774806418136825, "grad_norm": 0.3222532632143931, "learning_rate": 1.8018457707297332e-05, "loss": 0.1704, "step": 2853 }, { "epoch": 0.22782789175381177, "grad_norm": 0.37569298831327735, "learning_rate": 1.8016912546573654e-05, "loss": 0.2697, "step": 2854 }, { "epoch": 0.22790771932625528, "grad_norm": 0.2987679564996674, "learning_rate": 1.80153668499511e-05, "loss": 0.1865, "step": 2855 }, { "epoch": 0.22798754689869882, "grad_norm": 0.3704562226410122, "learning_rate": 1.8013820617532987e-05, "loss": 0.1824, "step": 2856 }, { "epoch": 0.22806737447114234, "grad_norm": 0.29796755468432057, "learning_rate": 1.8012273849422684e-05, "loss": 0.1814, "step": 2857 }, { "epoch": 0.22814720204358585, "grad_norm": 0.30144673660757176, "learning_rate": 1.8010726545723576e-05, "loss": 0.1825, "step": 2858 }, { "epoch": 0.22822702961602936, "grad_norm": 0.34456087875021285, "learning_rate": 1.800917870653911e-05, "loss": 0.1706, "step": 2859 }, { "epoch": 0.2283068571884729, "grad_norm": 0.27165087472902716, "learning_rate": 1.8007630331972733e-05, "loss": 0.1851, "step": 2860 }, { "epoch": 0.22838668476091642, "grad_norm": 0.2783211542341563, "learning_rate": 1.8006081422127965e-05, "loss": 0.1933, "step": 2861 }, { "epoch": 0.22846651233335993, "grad_norm": 0.35804816035363934, "learning_rate": 1.8004531977108336e-05, "loss": 0.1961, "step": 2862 }, { "epoch": 0.22854633990580348, "grad_norm": 0.33153351611370885, "learning_rate": 1.8002981997017422e-05, "loss": 0.1423, "step": 2863 }, { "epoch": 0.228626167478247, "grad_norm": 0.3545980114301692, "learning_rate": 1.800143148195883e-05, "loss": 0.1959, "step": 2864 }, { "epoch": 0.2287059950506905, "grad_norm": 0.38144180325387295, "learning_rate": 1.799988043203621e-05, "loss": 0.1941, "step": 2865 }, { "epoch": 0.22878582262313404, "grad_norm": 0.3164606384107798, "learning_rate": 1.799832884735324e-05, "loss": 0.2286, "step": 2866 }, { "epoch": 0.22886565019557756, "grad_norm": 0.36457012646837833, "learning_rate": 1.7996776728013645e-05, "loss": 0.1983, "step": 2867 }, { "epoch": 0.22894547776802107, "grad_norm": 0.34570045835802593, "learning_rate": 1.7995224074121165e-05, "loss": 0.2311, "step": 2868 }, { "epoch": 0.22902530534046459, "grad_norm": 0.28589751416914577, "learning_rate": 1.79936708857796e-05, "loss": 0.1716, "step": 2869 }, { "epoch": 0.22910513291290813, "grad_norm": 0.2914573086938461, "learning_rate": 1.7992117163092772e-05, "loss": 0.1833, "step": 2870 }, { "epoch": 0.22918496048535164, "grad_norm": 0.38288137111792686, "learning_rate": 1.799056290616454e-05, "loss": 0.218, "step": 2871 }, { "epoch": 0.22926478805779515, "grad_norm": 0.34304375664723397, "learning_rate": 1.7989008115098796e-05, "loss": 0.1859, "step": 2872 }, { "epoch": 0.2293446156302387, "grad_norm": 0.2858869699744473, "learning_rate": 1.7987452789999477e-05, "loss": 0.2022, "step": 2873 }, { "epoch": 0.2294244432026822, "grad_norm": 0.31995708392086847, "learning_rate": 1.7985896930970554e-05, "loss": 0.1958, "step": 2874 }, { "epoch": 0.22950427077512572, "grad_norm": 0.3332541805130696, "learning_rate": 1.7984340538116016e-05, "loss": 0.1896, "step": 2875 }, { "epoch": 0.22958409834756924, "grad_norm": 0.3054348896682483, "learning_rate": 1.7982783611539916e-05, "loss": 0.1805, "step": 2876 }, { "epoch": 0.22966392592001278, "grad_norm": 0.3657862040364184, "learning_rate": 1.798122615134632e-05, "loss": 0.1952, "step": 2877 }, { "epoch": 0.2297437534924563, "grad_norm": 0.3543614253561974, "learning_rate": 1.7979668157639343e-05, "loss": 0.2304, "step": 2878 }, { "epoch": 0.2298235810648998, "grad_norm": 0.3607858516408698, "learning_rate": 1.7978109630523128e-05, "loss": 0.2307, "step": 2879 }, { "epoch": 0.22990340863734335, "grad_norm": 0.3588192547870524, "learning_rate": 1.7976550570101854e-05, "loss": 0.1942, "step": 2880 }, { "epoch": 0.22998323620978686, "grad_norm": 0.3388650855753529, "learning_rate": 1.7974990976479744e-05, "loss": 0.1832, "step": 2881 }, { "epoch": 0.23006306378223038, "grad_norm": 0.3303031417655938, "learning_rate": 1.7973430849761046e-05, "loss": 0.1847, "step": 2882 }, { "epoch": 0.23014289135467392, "grad_norm": 0.35306565303820875, "learning_rate": 1.7971870190050054e-05, "loss": 0.2524, "step": 2883 }, { "epoch": 0.23022271892711743, "grad_norm": 0.29793693596859055, "learning_rate": 1.7970308997451082e-05, "loss": 0.2046, "step": 2884 }, { "epoch": 0.23030254649956095, "grad_norm": 0.26396337986028423, "learning_rate": 1.79687472720685e-05, "loss": 0.2092, "step": 2885 }, { "epoch": 0.23038237407200446, "grad_norm": 0.3106871840885874, "learning_rate": 1.7967185014006698e-05, "loss": 0.2013, "step": 2886 }, { "epoch": 0.230462201644448, "grad_norm": 0.3667029602962845, "learning_rate": 1.7965622223370106e-05, "loss": 0.186, "step": 2887 }, { "epoch": 0.23054202921689151, "grad_norm": 0.31375301482055423, "learning_rate": 1.796405890026319e-05, "loss": 0.1905, "step": 2888 }, { "epoch": 0.23062185678933503, "grad_norm": 0.2773720072680311, "learning_rate": 1.7962495044790458e-05, "loss": 0.1838, "step": 2889 }, { "epoch": 0.23070168436177857, "grad_norm": 0.32528730039353543, "learning_rate": 1.796093065705644e-05, "loss": 0.2349, "step": 2890 }, { "epoch": 0.23078151193422208, "grad_norm": 0.3789338180520869, "learning_rate": 1.7959365737165714e-05, "loss": 0.1992, "step": 2891 }, { "epoch": 0.2308613395066656, "grad_norm": 0.3411188522849767, "learning_rate": 1.7957800285222884e-05, "loss": 0.2473, "step": 2892 }, { "epoch": 0.2309411670791091, "grad_norm": 0.39449624166462305, "learning_rate": 1.79562343013326e-05, "loss": 0.2408, "step": 2893 }, { "epoch": 0.23102099465155265, "grad_norm": 0.3064991444281506, "learning_rate": 1.7954667785599537e-05, "loss": 0.2259, "step": 2894 }, { "epoch": 0.23110082222399617, "grad_norm": 0.2672318871250462, "learning_rate": 1.7953100738128416e-05, "loss": 0.1966, "step": 2895 }, { "epoch": 0.23118064979643968, "grad_norm": 0.28911620883613626, "learning_rate": 1.795153315902398e-05, "loss": 0.2289, "step": 2896 }, { "epoch": 0.23126047736888322, "grad_norm": 0.3548632841875156, "learning_rate": 1.7949965048391026e-05, "loss": 0.167, "step": 2897 }, { "epoch": 0.23134030494132674, "grad_norm": 0.36231276232188786, "learning_rate": 1.7948396406334364e-05, "loss": 0.2115, "step": 2898 }, { "epoch": 0.23142013251377025, "grad_norm": 0.30815585661231243, "learning_rate": 1.7946827232958858e-05, "loss": 0.212, "step": 2899 }, { "epoch": 0.2314999600862138, "grad_norm": 0.33616372803086625, "learning_rate": 1.7945257528369406e-05, "loss": 0.2085, "step": 2900 }, { "epoch": 0.2315797876586573, "grad_norm": 0.4034712130059999, "learning_rate": 1.7943687292670927e-05, "loss": 0.1817, "step": 2901 }, { "epoch": 0.23165961523110082, "grad_norm": 0.30224950257344646, "learning_rate": 1.7942116525968388e-05, "loss": 0.176, "step": 2902 }, { "epoch": 0.23173944280354433, "grad_norm": 0.30387038523133686, "learning_rate": 1.794054522836679e-05, "loss": 0.1953, "step": 2903 }, { "epoch": 0.23181927037598787, "grad_norm": 0.349224992826272, "learning_rate": 1.793897339997117e-05, "loss": 0.2559, "step": 2904 }, { "epoch": 0.2318990979484314, "grad_norm": 0.3415490047571713, "learning_rate": 1.7937401040886594e-05, "loss": 0.175, "step": 2905 }, { "epoch": 0.2319789255208749, "grad_norm": 0.3636879772136909, "learning_rate": 1.7935828151218168e-05, "loss": 0.2187, "step": 2906 }, { "epoch": 0.23205875309331844, "grad_norm": 0.3406248154894015, "learning_rate": 1.793425473107104e-05, "loss": 0.2043, "step": 2907 }, { "epoch": 0.23213858066576196, "grad_norm": 0.33300605025610663, "learning_rate": 1.793268078055038e-05, "loss": 0.1902, "step": 2908 }, { "epoch": 0.23221840823820547, "grad_norm": 0.35273045809378645, "learning_rate": 1.7931106299761404e-05, "loss": 0.1828, "step": 2909 }, { "epoch": 0.232298235810649, "grad_norm": 0.3121907024293119, "learning_rate": 1.7929531288809358e-05, "loss": 0.1912, "step": 2910 }, { "epoch": 0.23237806338309253, "grad_norm": 0.2823921836921022, "learning_rate": 1.7927955747799526e-05, "loss": 0.1667, "step": 2911 }, { "epoch": 0.23245789095553604, "grad_norm": 0.32165492089440384, "learning_rate": 1.7926379676837226e-05, "loss": 0.1819, "step": 2912 }, { "epoch": 0.23253771852797955, "grad_norm": 0.28968811924594795, "learning_rate": 1.792480307602781e-05, "loss": 0.1825, "step": 2913 }, { "epoch": 0.2326175461004231, "grad_norm": 0.3313701952641492, "learning_rate": 1.7923225945476675e-05, "loss": 0.1729, "step": 2914 }, { "epoch": 0.2326973736728666, "grad_norm": 0.31779181287082814, "learning_rate": 1.792164828528924e-05, "loss": 0.1927, "step": 2915 }, { "epoch": 0.23277720124531012, "grad_norm": 0.3519040754608538, "learning_rate": 1.7920070095570964e-05, "loss": 0.2124, "step": 2916 }, { "epoch": 0.23285702881775366, "grad_norm": 0.30521431444665753, "learning_rate": 1.7918491376427344e-05, "loss": 0.2041, "step": 2917 }, { "epoch": 0.23293685639019718, "grad_norm": 0.38185805149849544, "learning_rate": 1.7916912127963914e-05, "loss": 0.2212, "step": 2918 }, { "epoch": 0.2330166839626407, "grad_norm": 0.3244128738216239, "learning_rate": 1.7915332350286242e-05, "loss": 0.1543, "step": 2919 }, { "epoch": 0.2330965115350842, "grad_norm": 0.3013577005098662, "learning_rate": 1.7913752043499926e-05, "loss": 0.2003, "step": 2920 }, { "epoch": 0.23317633910752775, "grad_norm": 0.39687111466893626, "learning_rate": 1.79121712077106e-05, "loss": 0.1991, "step": 2921 }, { "epoch": 0.23325616667997126, "grad_norm": 0.30247908356947384, "learning_rate": 1.7910589843023945e-05, "loss": 0.2057, "step": 2922 }, { "epoch": 0.23333599425241477, "grad_norm": 0.29495355851938737, "learning_rate": 1.7909007949545662e-05, "loss": 0.1798, "step": 2923 }, { "epoch": 0.23341582182485832, "grad_norm": 0.27909133057843133, "learning_rate": 1.7907425527381496e-05, "loss": 0.2209, "step": 2924 }, { "epoch": 0.23349564939730183, "grad_norm": 0.3730001667056088, "learning_rate": 1.790584257663723e-05, "loss": 0.1415, "step": 2925 }, { "epoch": 0.23357547696974534, "grad_norm": 0.41263510984482216, "learning_rate": 1.790425909741867e-05, "loss": 0.1747, "step": 2926 }, { "epoch": 0.23365530454218889, "grad_norm": 0.3074380243522797, "learning_rate": 1.7902675089831675e-05, "loss": 0.2169, "step": 2927 }, { "epoch": 0.2337351321146324, "grad_norm": 0.30407592289788116, "learning_rate": 1.790109055398212e-05, "loss": 0.1991, "step": 2928 }, { "epoch": 0.2338149596870759, "grad_norm": 0.26487033374321756, "learning_rate": 1.789950548997593e-05, "loss": 0.2097, "step": 2929 }, { "epoch": 0.23389478725951943, "grad_norm": 0.29286583700383906, "learning_rate": 1.789791989791906e-05, "loss": 0.1989, "step": 2930 }, { "epoch": 0.23397461483196297, "grad_norm": 0.2911005246986667, "learning_rate": 1.7896333777917502e-05, "loss": 0.183, "step": 2931 }, { "epoch": 0.23405444240440648, "grad_norm": 0.37817653257522366, "learning_rate": 1.789474713007728e-05, "loss": 0.2412, "step": 2932 }, { "epoch": 0.23413426997685, "grad_norm": 0.378218832530079, "learning_rate": 1.789315995450445e-05, "loss": 0.2049, "step": 2933 }, { "epoch": 0.23421409754929354, "grad_norm": 0.2870253071306374, "learning_rate": 1.789157225130512e-05, "loss": 0.1579, "step": 2934 }, { "epoch": 0.23429392512173705, "grad_norm": 0.3403483675868003, "learning_rate": 1.7889984020585417e-05, "loss": 0.1573, "step": 2935 }, { "epoch": 0.23437375269418056, "grad_norm": 0.33185502231610986, "learning_rate": 1.78883952624515e-05, "loss": 0.2346, "step": 2936 }, { "epoch": 0.23445358026662408, "grad_norm": 0.3140655529190342, "learning_rate": 1.788680597700958e-05, "loss": 0.1879, "step": 2937 }, { "epoch": 0.23453340783906762, "grad_norm": 0.3636391684902548, "learning_rate": 1.788521616436589e-05, "loss": 0.1923, "step": 2938 }, { "epoch": 0.23461323541151113, "grad_norm": 0.3811252058191763, "learning_rate": 1.7883625824626708e-05, "loss": 0.1806, "step": 2939 }, { "epoch": 0.23469306298395465, "grad_norm": 0.3064983536454124, "learning_rate": 1.7882034957898334e-05, "loss": 0.2205, "step": 2940 }, { "epoch": 0.2347728905563982, "grad_norm": 0.39813516469740845, "learning_rate": 1.788044356428712e-05, "loss": 0.1877, "step": 2941 }, { "epoch": 0.2348527181288417, "grad_norm": 0.3038353998881489, "learning_rate": 1.787885164389944e-05, "loss": 0.1571, "step": 2942 }, { "epoch": 0.23493254570128522, "grad_norm": 0.30768212898876923, "learning_rate": 1.7877259196841702e-05, "loss": 0.1937, "step": 2943 }, { "epoch": 0.23501237327372876, "grad_norm": 0.3728617143261402, "learning_rate": 1.7875666223220367e-05, "loss": 0.198, "step": 2944 }, { "epoch": 0.23509220084617227, "grad_norm": 0.34957438331132623, "learning_rate": 1.787407272314191e-05, "loss": 0.2229, "step": 2945 }, { "epoch": 0.23517202841861579, "grad_norm": 0.32324964770092357, "learning_rate": 1.787247869671285e-05, "loss": 0.2092, "step": 2946 }, { "epoch": 0.2352518559910593, "grad_norm": 0.301749858865269, "learning_rate": 1.7870884144039747e-05, "loss": 0.2218, "step": 2947 }, { "epoch": 0.23533168356350284, "grad_norm": 0.33620459018810817, "learning_rate": 1.7869289065229186e-05, "loss": 0.21, "step": 2948 }, { "epoch": 0.23541151113594636, "grad_norm": 0.3669298991301133, "learning_rate": 1.786769346038779e-05, "loss": 0.2017, "step": 2949 }, { "epoch": 0.23549133870838987, "grad_norm": 0.3647698454964786, "learning_rate": 1.7866097329622228e-05, "loss": 0.1848, "step": 2950 }, { "epoch": 0.2355711662808334, "grad_norm": 0.2907784523422708, "learning_rate": 1.7864500673039185e-05, "loss": 0.2056, "step": 2951 }, { "epoch": 0.23565099385327692, "grad_norm": 0.3199801905778575, "learning_rate": 1.7862903490745397e-05, "loss": 0.2506, "step": 2952 }, { "epoch": 0.23573082142572044, "grad_norm": 0.34637068990126746, "learning_rate": 1.7861305782847628e-05, "loss": 0.2443, "step": 2953 }, { "epoch": 0.23581064899816395, "grad_norm": 0.29607121297389827, "learning_rate": 1.7859707549452675e-05, "loss": 0.2282, "step": 2954 }, { "epoch": 0.2358904765706075, "grad_norm": 0.33970915217549735, "learning_rate": 1.785810879066738e-05, "loss": 0.2145, "step": 2955 }, { "epoch": 0.235970304143051, "grad_norm": 0.2884686370917457, "learning_rate": 1.7856509506598607e-05, "loss": 0.2081, "step": 2956 }, { "epoch": 0.23605013171549452, "grad_norm": 0.37611069988449497, "learning_rate": 1.7854909697353266e-05, "loss": 0.1856, "step": 2957 }, { "epoch": 0.23612995928793806, "grad_norm": 0.4498567789427179, "learning_rate": 1.78533093630383e-05, "loss": 0.2136, "step": 2958 }, { "epoch": 0.23620978686038158, "grad_norm": 0.3194272254892143, "learning_rate": 1.7851708503760678e-05, "loss": 0.1761, "step": 2959 }, { "epoch": 0.2362896144328251, "grad_norm": 0.3240335614461723, "learning_rate": 1.7850107119627415e-05, "loss": 0.204, "step": 2960 }, { "epoch": 0.23636944200526863, "grad_norm": 0.3872050196018681, "learning_rate": 1.784850521074556e-05, "loss": 0.2004, "step": 2961 }, { "epoch": 0.23644926957771215, "grad_norm": 0.31163877369632564, "learning_rate": 1.7846902777222188e-05, "loss": 0.2032, "step": 2962 }, { "epoch": 0.23652909715015566, "grad_norm": 0.2794056403967729, "learning_rate": 1.7845299819164422e-05, "loss": 0.1943, "step": 2963 }, { "epoch": 0.23660892472259917, "grad_norm": 0.3272627977981584, "learning_rate": 1.7843696336679407e-05, "loss": 0.1808, "step": 2964 }, { "epoch": 0.23668875229504271, "grad_norm": 0.36426137128680186, "learning_rate": 1.7842092329874336e-05, "loss": 0.2315, "step": 2965 }, { "epoch": 0.23676857986748623, "grad_norm": 0.34272558818175103, "learning_rate": 1.7840487798856428e-05, "loss": 0.2198, "step": 2966 }, { "epoch": 0.23684840743992974, "grad_norm": 0.32497417057493655, "learning_rate": 1.7838882743732936e-05, "loss": 0.2029, "step": 2967 }, { "epoch": 0.23692823501237328, "grad_norm": 0.28211933162302916, "learning_rate": 1.7837277164611156e-05, "loss": 0.1978, "step": 2968 }, { "epoch": 0.2370080625848168, "grad_norm": 0.2958779457882624, "learning_rate": 1.783567106159841e-05, "loss": 0.1697, "step": 2969 }, { "epoch": 0.2370878901572603, "grad_norm": 0.3037067856729293, "learning_rate": 1.7834064434802067e-05, "loss": 0.1795, "step": 2970 }, { "epoch": 0.23716771772970385, "grad_norm": 0.3014483778635643, "learning_rate": 1.7832457284329517e-05, "loss": 0.2045, "step": 2971 }, { "epoch": 0.23724754530214737, "grad_norm": 0.2709359795110745, "learning_rate": 1.7830849610288193e-05, "loss": 0.1862, "step": 2972 }, { "epoch": 0.23732737287459088, "grad_norm": 0.3031152841758031, "learning_rate": 1.7829241412785563e-05, "loss": 0.2206, "step": 2973 }, { "epoch": 0.2374072004470344, "grad_norm": 0.2570740315220999, "learning_rate": 1.782763269192913e-05, "loss": 0.2158, "step": 2974 }, { "epoch": 0.23748702801947794, "grad_norm": 0.3393092609226804, "learning_rate": 1.7826023447826426e-05, "loss": 0.196, "step": 2975 }, { "epoch": 0.23756685559192145, "grad_norm": 0.3233989339219855, "learning_rate": 1.7824413680585027e-05, "loss": 0.2303, "step": 2976 }, { "epoch": 0.23764668316436496, "grad_norm": 0.3533277656566192, "learning_rate": 1.7822803390312538e-05, "loss": 0.2158, "step": 2977 }, { "epoch": 0.2377265107368085, "grad_norm": 0.29229040720410765, "learning_rate": 1.78211925771166e-05, "loss": 0.2017, "step": 2978 }, { "epoch": 0.23780633830925202, "grad_norm": 0.31949700698261974, "learning_rate": 1.7819581241104892e-05, "loss": 0.1741, "step": 2979 }, { "epoch": 0.23788616588169553, "grad_norm": 0.35911261483934687, "learning_rate": 1.781796938238512e-05, "loss": 0.2319, "step": 2980 }, { "epoch": 0.23796599345413905, "grad_norm": 0.3698645727751469, "learning_rate": 1.7816357001065036e-05, "loss": 0.1858, "step": 2981 }, { "epoch": 0.2380458210265826, "grad_norm": 0.29048645952508173, "learning_rate": 1.7814744097252418e-05, "loss": 0.2106, "step": 2982 }, { "epoch": 0.2381256485990261, "grad_norm": 0.29795792332139465, "learning_rate": 1.7813130671055082e-05, "loss": 0.1752, "step": 2983 }, { "epoch": 0.23820547617146962, "grad_norm": 0.2758193244509528, "learning_rate": 1.7811516722580884e-05, "loss": 0.2036, "step": 2984 }, { "epoch": 0.23828530374391316, "grad_norm": 0.298613144521083, "learning_rate": 1.7809902251937705e-05, "loss": 0.1988, "step": 2985 }, { "epoch": 0.23836513131635667, "grad_norm": 0.33545510216989916, "learning_rate": 1.780828725923347e-05, "loss": 0.235, "step": 2986 }, { "epoch": 0.23844495888880018, "grad_norm": 0.29478497308766044, "learning_rate": 1.7806671744576127e-05, "loss": 0.2595, "step": 2987 }, { "epoch": 0.23852478646124373, "grad_norm": 0.27395150527797163, "learning_rate": 1.7805055708073676e-05, "loss": 0.2019, "step": 2988 }, { "epoch": 0.23860461403368724, "grad_norm": 0.3331190482567619, "learning_rate": 1.7803439149834138e-05, "loss": 0.2196, "step": 2989 }, { "epoch": 0.23868444160613075, "grad_norm": 0.33859483281201896, "learning_rate": 1.780182206996557e-05, "loss": 0.2198, "step": 2990 }, { "epoch": 0.23876426917857427, "grad_norm": 0.3104282780211496, "learning_rate": 1.780020446857608e-05, "loss": 0.1744, "step": 2991 }, { "epoch": 0.2388440967510178, "grad_norm": 0.2921248397333817, "learning_rate": 1.7798586345773785e-05, "loss": 0.2326, "step": 2992 }, { "epoch": 0.23892392432346132, "grad_norm": 0.31075849838482034, "learning_rate": 1.7796967701666856e-05, "loss": 0.1647, "step": 2993 }, { "epoch": 0.23900375189590484, "grad_norm": 0.2818574832425733, "learning_rate": 1.7795348536363492e-05, "loss": 0.2111, "step": 2994 }, { "epoch": 0.23908357946834838, "grad_norm": 0.26627759581918437, "learning_rate": 1.779372884997193e-05, "loss": 0.1873, "step": 2995 }, { "epoch": 0.2391634070407919, "grad_norm": 0.3105036989367001, "learning_rate": 1.779210864260043e-05, "loss": 0.1796, "step": 2996 }, { "epoch": 0.2392432346132354, "grad_norm": 0.2923460787107773, "learning_rate": 1.7790487914357314e-05, "loss": 0.1507, "step": 2997 }, { "epoch": 0.23932306218567892, "grad_norm": 0.3430986728384233, "learning_rate": 1.7788866665350904e-05, "loss": 0.1782, "step": 2998 }, { "epoch": 0.23940288975812246, "grad_norm": 0.3659261218391485, "learning_rate": 1.7787244895689584e-05, "loss": 0.203, "step": 2999 }, { "epoch": 0.23948271733056598, "grad_norm": 0.33004626819262844, "learning_rate": 1.778562260548176e-05, "loss": 0.1755, "step": 3000 }, { "epoch": 0.2395625449030095, "grad_norm": 0.35132126675447256, "learning_rate": 1.7783999794835875e-05, "loss": 0.2564, "step": 3001 }, { "epoch": 0.23964237247545303, "grad_norm": 0.3486825570874051, "learning_rate": 1.778237646386041e-05, "loss": 0.2039, "step": 3002 }, { "epoch": 0.23972220004789654, "grad_norm": 0.35324630876216523, "learning_rate": 1.7780752612663877e-05, "loss": 0.1552, "step": 3003 }, { "epoch": 0.23980202762034006, "grad_norm": 0.28732395952385625, "learning_rate": 1.777912824135482e-05, "loss": 0.2274, "step": 3004 }, { "epoch": 0.2398818551927836, "grad_norm": 0.3786059885211332, "learning_rate": 1.777750335004183e-05, "loss": 0.2009, "step": 3005 }, { "epoch": 0.2399616827652271, "grad_norm": 0.3346462407609308, "learning_rate": 1.7775877938833518e-05, "loss": 0.1728, "step": 3006 }, { "epoch": 0.24004151033767063, "grad_norm": 0.3008543659788963, "learning_rate": 1.7774252007838537e-05, "loss": 0.2121, "step": 3007 }, { "epoch": 0.24012133791011414, "grad_norm": 0.284170820994895, "learning_rate": 1.7772625557165572e-05, "loss": 0.2226, "step": 3008 }, { "epoch": 0.24020116548255768, "grad_norm": 0.3135026634645097, "learning_rate": 1.7770998586923354e-05, "loss": 0.1716, "step": 3009 }, { "epoch": 0.2402809930550012, "grad_norm": 0.3016138604407625, "learning_rate": 1.7769371097220633e-05, "loss": 0.2072, "step": 3010 }, { "epoch": 0.2403608206274447, "grad_norm": 0.26810991232389303, "learning_rate": 1.7767743088166196e-05, "loss": 0.1621, "step": 3011 }, { "epoch": 0.24044064819988825, "grad_norm": 0.2661378711768829, "learning_rate": 1.776611455986888e-05, "loss": 0.1615, "step": 3012 }, { "epoch": 0.24052047577233177, "grad_norm": 0.29766983079602777, "learning_rate": 1.7764485512437536e-05, "loss": 0.2366, "step": 3013 }, { "epoch": 0.24060030334477528, "grad_norm": 0.2997954877244403, "learning_rate": 1.7762855945981062e-05, "loss": 0.169, "step": 3014 }, { "epoch": 0.24068013091721882, "grad_norm": 0.32368665984666734, "learning_rate": 1.7761225860608392e-05, "loss": 0.2524, "step": 3015 }, { "epoch": 0.24075995848966233, "grad_norm": 0.3065261229162094, "learning_rate": 1.7759595256428483e-05, "loss": 0.175, "step": 3016 }, { "epoch": 0.24083978606210585, "grad_norm": 0.43922735743858565, "learning_rate": 1.7757964133550343e-05, "loss": 0.1832, "step": 3017 }, { "epoch": 0.24091961363454936, "grad_norm": 0.40796777119796046, "learning_rate": 1.7756332492083e-05, "loss": 0.2049, "step": 3018 }, { "epoch": 0.2409994412069929, "grad_norm": 0.30791057292283797, "learning_rate": 1.7754700332135528e-05, "loss": 0.1846, "step": 3019 }, { "epoch": 0.24107926877943642, "grad_norm": 0.292670311271502, "learning_rate": 1.7753067653817025e-05, "loss": 0.1848, "step": 3020 }, { "epoch": 0.24115909635187993, "grad_norm": 0.32431106818393257, "learning_rate": 1.7751434457236632e-05, "loss": 0.217, "step": 3021 }, { "epoch": 0.24123892392432347, "grad_norm": 0.3062486082985241, "learning_rate": 1.7749800742503522e-05, "loss": 0.1683, "step": 3022 }, { "epoch": 0.241318751496767, "grad_norm": 0.34195454004046943, "learning_rate": 1.7748166509726902e-05, "loss": 0.1954, "step": 3023 }, { "epoch": 0.2413985790692105, "grad_norm": 0.32949853787476724, "learning_rate": 1.7746531759016014e-05, "loss": 0.2166, "step": 3024 }, { "epoch": 0.24147840664165401, "grad_norm": 0.31757777196130266, "learning_rate": 1.7744896490480133e-05, "loss": 0.1487, "step": 3025 }, { "epoch": 0.24155823421409756, "grad_norm": 0.30596258767374396, "learning_rate": 1.774326070422857e-05, "loss": 0.1967, "step": 3026 }, { "epoch": 0.24163806178654107, "grad_norm": 0.30953277436303844, "learning_rate": 1.7741624400370674e-05, "loss": 0.1937, "step": 3027 }, { "epoch": 0.24171788935898458, "grad_norm": 0.3293076013151342, "learning_rate": 1.7739987579015823e-05, "loss": 0.1923, "step": 3028 }, { "epoch": 0.24179771693142812, "grad_norm": 0.2930699254023919, "learning_rate": 1.773835024027343e-05, "loss": 0.2188, "step": 3029 }, { "epoch": 0.24187754450387164, "grad_norm": 0.3332668828005155, "learning_rate": 1.773671238425295e-05, "loss": 0.1868, "step": 3030 }, { "epoch": 0.24195737207631515, "grad_norm": 0.37317943217901095, "learning_rate": 1.7735074011063868e-05, "loss": 0.1876, "step": 3031 }, { "epoch": 0.2420371996487587, "grad_norm": 0.3692359362506228, "learning_rate": 1.7733435120815695e-05, "loss": 0.1813, "step": 3032 }, { "epoch": 0.2421170272212022, "grad_norm": 0.3514928782679999, "learning_rate": 1.773179571361799e-05, "loss": 0.169, "step": 3033 }, { "epoch": 0.24219685479364572, "grad_norm": 0.3472781942861378, "learning_rate": 1.773015578958034e-05, "loss": 0.2319, "step": 3034 }, { "epoch": 0.24227668236608924, "grad_norm": 0.3106037883301204, "learning_rate": 1.7728515348812365e-05, "loss": 0.2167, "step": 3035 }, { "epoch": 0.24235650993853278, "grad_norm": 0.37255181729751546, "learning_rate": 1.7726874391423727e-05, "loss": 0.1494, "step": 3036 }, { "epoch": 0.2424363375109763, "grad_norm": 0.35878751532873643, "learning_rate": 1.7725232917524112e-05, "loss": 0.1777, "step": 3037 }, { "epoch": 0.2425161650834198, "grad_norm": 0.37042516798289143, "learning_rate": 1.7723590927223247e-05, "loss": 0.1716, "step": 3038 }, { "epoch": 0.24259599265586335, "grad_norm": 0.3242770744996797, "learning_rate": 1.77219484206309e-05, "loss": 0.2249, "step": 3039 }, { "epoch": 0.24267582022830686, "grad_norm": 0.3360135385379344, "learning_rate": 1.7720305397856855e-05, "loss": 0.1684, "step": 3040 }, { "epoch": 0.24275564780075037, "grad_norm": 0.3971804872261624, "learning_rate": 1.771866185901095e-05, "loss": 0.1712, "step": 3041 }, { "epoch": 0.2428354753731939, "grad_norm": 0.36993401258209635, "learning_rate": 1.771701780420304e-05, "loss": 0.1733, "step": 3042 }, { "epoch": 0.24291530294563743, "grad_norm": 0.3276074433703249, "learning_rate": 1.7715373233543032e-05, "loss": 0.1817, "step": 3043 }, { "epoch": 0.24299513051808094, "grad_norm": 0.37873760275358104, "learning_rate": 1.7713728147140857e-05, "loss": 0.1814, "step": 3044 }, { "epoch": 0.24307495809052446, "grad_norm": 0.3662151173223893, "learning_rate": 1.7712082545106485e-05, "loss": 0.2185, "step": 3045 }, { "epoch": 0.243154785662968, "grad_norm": 0.37221209033361, "learning_rate": 1.7710436427549913e-05, "loss": 0.2174, "step": 3046 }, { "epoch": 0.2432346132354115, "grad_norm": 0.35607967462104567, "learning_rate": 1.7708789794581176e-05, "loss": 0.2318, "step": 3047 }, { "epoch": 0.24331444080785503, "grad_norm": 0.4063344095228948, "learning_rate": 1.770714264631035e-05, "loss": 0.208, "step": 3048 }, { "epoch": 0.24339426838029857, "grad_norm": 0.3460883261453932, "learning_rate": 1.770549498284754e-05, "loss": 0.2112, "step": 3049 }, { "epoch": 0.24347409595274208, "grad_norm": 0.33351277821344233, "learning_rate": 1.7703846804302883e-05, "loss": 0.2251, "step": 3050 }, { "epoch": 0.2435539235251856, "grad_norm": 0.3550453253761195, "learning_rate": 1.7702198110786555e-05, "loss": 0.2232, "step": 3051 }, { "epoch": 0.2436337510976291, "grad_norm": 0.30923969969865783, "learning_rate": 1.7700548902408762e-05, "loss": 0.1689, "step": 3052 }, { "epoch": 0.24371357867007265, "grad_norm": 0.29651136860250277, "learning_rate": 1.7698899179279753e-05, "loss": 0.1666, "step": 3053 }, { "epoch": 0.24379340624251616, "grad_norm": 0.3002272133512093, "learning_rate": 1.7697248941509798e-05, "loss": 0.2196, "step": 3054 }, { "epoch": 0.24387323381495968, "grad_norm": 0.3745264397102359, "learning_rate": 1.7695598189209215e-05, "loss": 0.2352, "step": 3055 }, { "epoch": 0.24395306138740322, "grad_norm": 0.29311480737458423, "learning_rate": 1.769394692248835e-05, "loss": 0.1554, "step": 3056 }, { "epoch": 0.24403288895984673, "grad_norm": 0.35932862119078557, "learning_rate": 1.7692295141457575e-05, "loss": 0.1602, "step": 3057 }, { "epoch": 0.24411271653229025, "grad_norm": 0.32176999994086675, "learning_rate": 1.7690642846227315e-05, "loss": 0.1808, "step": 3058 }, { "epoch": 0.2441925441047338, "grad_norm": 0.3209999411740814, "learning_rate": 1.768899003690802e-05, "loss": 0.2391, "step": 3059 }, { "epoch": 0.2442723716771773, "grad_norm": 0.3153666928929671, "learning_rate": 1.768733671361016e-05, "loss": 0.2083, "step": 3060 }, { "epoch": 0.24435219924962082, "grad_norm": 0.3079574100469631, "learning_rate": 1.768568287644427e-05, "loss": 0.1806, "step": 3061 }, { "epoch": 0.24443202682206433, "grad_norm": 0.36889024323157726, "learning_rate": 1.7684028525520893e-05, "loss": 0.2334, "step": 3062 }, { "epoch": 0.24451185439450787, "grad_norm": 0.34775698249476633, "learning_rate": 1.768237366095062e-05, "loss": 0.22, "step": 3063 }, { "epoch": 0.24459168196695139, "grad_norm": 0.29996013303922775, "learning_rate": 1.768071828284407e-05, "loss": 0.1916, "step": 3064 }, { "epoch": 0.2446715095393949, "grad_norm": 0.2992101990953325, "learning_rate": 1.7679062391311896e-05, "loss": 0.2493, "step": 3065 }, { "epoch": 0.24475133711183844, "grad_norm": 0.40045921973640985, "learning_rate": 1.767740598646479e-05, "loss": 0.2318, "step": 3066 }, { "epoch": 0.24483116468428195, "grad_norm": 0.2986888351427369, "learning_rate": 1.7675749068413477e-05, "loss": 0.1985, "step": 3067 }, { "epoch": 0.24491099225672547, "grad_norm": 0.2957639144962733, "learning_rate": 1.7674091637268717e-05, "loss": 0.1843, "step": 3068 }, { "epoch": 0.24499081982916898, "grad_norm": 0.32732270193915464, "learning_rate": 1.76724336931413e-05, "loss": 0.2064, "step": 3069 }, { "epoch": 0.24507064740161252, "grad_norm": 0.4146260266418904, "learning_rate": 1.7670775236142054e-05, "loss": 0.2017, "step": 3070 }, { "epoch": 0.24515047497405604, "grad_norm": 0.3376411523467657, "learning_rate": 1.7669116266381837e-05, "loss": 0.2127, "step": 3071 }, { "epoch": 0.24523030254649955, "grad_norm": 0.31682993372292445, "learning_rate": 1.7667456783971554e-05, "loss": 0.214, "step": 3072 }, { "epoch": 0.2453101301189431, "grad_norm": 0.29349455962389104, "learning_rate": 1.766579678902212e-05, "loss": 0.2331, "step": 3073 }, { "epoch": 0.2453899576913866, "grad_norm": 0.36281209643249657, "learning_rate": 1.7664136281644514e-05, "loss": 0.1841, "step": 3074 }, { "epoch": 0.24546978526383012, "grad_norm": 0.35210018313793745, "learning_rate": 1.7662475261949724e-05, "loss": 0.2198, "step": 3075 }, { "epoch": 0.24554961283627366, "grad_norm": 0.277200378720315, "learning_rate": 1.766081373004879e-05, "loss": 0.2138, "step": 3076 }, { "epoch": 0.24562944040871718, "grad_norm": 0.42462248085641985, "learning_rate": 1.765915168605277e-05, "loss": 0.216, "step": 3077 }, { "epoch": 0.2457092679811607, "grad_norm": 0.3902464597404692, "learning_rate": 1.7657489130072773e-05, "loss": 0.2027, "step": 3078 }, { "epoch": 0.2457890955536042, "grad_norm": 0.28465646408949824, "learning_rate": 1.7655826062219932e-05, "loss": 0.1828, "step": 3079 }, { "epoch": 0.24586892312604774, "grad_norm": 0.29953343284596073, "learning_rate": 1.7654162482605418e-05, "loss": 0.1876, "step": 3080 }, { "epoch": 0.24594875069849126, "grad_norm": 0.3167625371220823, "learning_rate": 1.765249839134043e-05, "loss": 0.1606, "step": 3081 }, { "epoch": 0.24602857827093477, "grad_norm": 0.3544654399488348, "learning_rate": 1.765083378853621e-05, "loss": 0.1998, "step": 3082 }, { "epoch": 0.24610840584337831, "grad_norm": 0.27822748008377274, "learning_rate": 1.7649168674304026e-05, "loss": 0.2021, "step": 3083 }, { "epoch": 0.24618823341582183, "grad_norm": 0.3259003311172956, "learning_rate": 1.764750304875519e-05, "loss": 0.1684, "step": 3084 }, { "epoch": 0.24626806098826534, "grad_norm": 0.4757402928651855, "learning_rate": 1.764583691200104e-05, "loss": 0.207, "step": 3085 }, { "epoch": 0.24634788856070886, "grad_norm": 0.34922505901928225, "learning_rate": 1.764417026415295e-05, "loss": 0.2303, "step": 3086 }, { "epoch": 0.2464277161331524, "grad_norm": 0.28772530320921313, "learning_rate": 1.7642503105322327e-05, "loss": 0.1949, "step": 3087 }, { "epoch": 0.2465075437055959, "grad_norm": 0.4237262426685162, "learning_rate": 1.764083543562062e-05, "loss": 0.1998, "step": 3088 }, { "epoch": 0.24658737127803942, "grad_norm": 0.3649987526415709, "learning_rate": 1.76391672551593e-05, "loss": 0.2372, "step": 3089 }, { "epoch": 0.24666719885048297, "grad_norm": 0.2854526444259618, "learning_rate": 1.7637498564049878e-05, "loss": 0.1869, "step": 3090 }, { "epoch": 0.24674702642292648, "grad_norm": 0.3093316874693304, "learning_rate": 1.7635829362403904e-05, "loss": 0.1886, "step": 3091 }, { "epoch": 0.24682685399537, "grad_norm": 0.419767780610806, "learning_rate": 1.7634159650332953e-05, "loss": 0.1539, "step": 3092 }, { "epoch": 0.24690668156781354, "grad_norm": 0.32781737928028803, "learning_rate": 1.7632489427948646e-05, "loss": 0.1909, "step": 3093 }, { "epoch": 0.24698650914025705, "grad_norm": 0.29051260202612306, "learning_rate": 1.763081869536262e-05, "loss": 0.2193, "step": 3094 }, { "epoch": 0.24706633671270056, "grad_norm": 0.28435903861721135, "learning_rate": 1.7629147452686566e-05, "loss": 0.2007, "step": 3095 }, { "epoch": 0.24714616428514408, "grad_norm": 0.3398574010729059, "learning_rate": 1.7627475700032197e-05, "loss": 0.2385, "step": 3096 }, { "epoch": 0.24722599185758762, "grad_norm": 0.2818183943574803, "learning_rate": 1.7625803437511262e-05, "loss": 0.1838, "step": 3097 }, { "epoch": 0.24730581943003113, "grad_norm": 0.3178427915214008, "learning_rate": 1.7624130665235545e-05, "loss": 0.1883, "step": 3098 }, { "epoch": 0.24738564700247465, "grad_norm": 0.3180527290505673, "learning_rate": 1.7622457383316864e-05, "loss": 0.1553, "step": 3099 }, { "epoch": 0.2474654745749182, "grad_norm": 0.3384223439197961, "learning_rate": 1.7620783591867073e-05, "loss": 0.175, "step": 3100 }, { "epoch": 0.2475453021473617, "grad_norm": 0.35452056052948194, "learning_rate": 1.761910929099806e-05, "loss": 0.1809, "step": 3101 }, { "epoch": 0.24762512971980521, "grad_norm": 0.3066823326807387, "learning_rate": 1.761743448082174e-05, "loss": 0.2242, "step": 3102 }, { "epoch": 0.24770495729224876, "grad_norm": 0.3048200274319912, "learning_rate": 1.7615759161450073e-05, "loss": 0.1898, "step": 3103 }, { "epoch": 0.24778478486469227, "grad_norm": 0.3614194099686825, "learning_rate": 1.761408333299504e-05, "loss": 0.1879, "step": 3104 }, { "epoch": 0.24786461243713578, "grad_norm": 0.38192114760956936, "learning_rate": 1.7612406995568673e-05, "loss": 0.2136, "step": 3105 }, { "epoch": 0.2479444400095793, "grad_norm": 0.27927671993123565, "learning_rate": 1.761073014928302e-05, "loss": 0.2086, "step": 3106 }, { "epoch": 0.24802426758202284, "grad_norm": 0.326746926845962, "learning_rate": 1.760905279425018e-05, "loss": 0.1648, "step": 3107 }, { "epoch": 0.24810409515446635, "grad_norm": 0.36619270109320745, "learning_rate": 1.760737493058227e-05, "loss": 0.1891, "step": 3108 }, { "epoch": 0.24818392272690987, "grad_norm": 0.36526814383267664, "learning_rate": 1.7605696558391453e-05, "loss": 0.1968, "step": 3109 }, { "epoch": 0.2482637502993534, "grad_norm": 0.2654867931771537, "learning_rate": 1.7604017677789916e-05, "loss": 0.1582, "step": 3110 }, { "epoch": 0.24834357787179692, "grad_norm": 0.2721607415398667, "learning_rate": 1.7602338288889894e-05, "loss": 0.1782, "step": 3111 }, { "epoch": 0.24842340544424044, "grad_norm": 0.30701205691100664, "learning_rate": 1.760065839180364e-05, "loss": 0.1606, "step": 3112 }, { "epoch": 0.24850323301668395, "grad_norm": 0.2628643331322752, "learning_rate": 1.7598977986643454e-05, "loss": 0.2064, "step": 3113 }, { "epoch": 0.2485830605891275, "grad_norm": 0.34185669158921683, "learning_rate": 1.759729707352166e-05, "loss": 0.2132, "step": 3114 }, { "epoch": 0.248662888161571, "grad_norm": 0.33702564068022334, "learning_rate": 1.7595615652550627e-05, "loss": 0.206, "step": 3115 }, { "epoch": 0.24874271573401452, "grad_norm": 0.3118136633178514, "learning_rate": 1.759393372384274e-05, "loss": 0.138, "step": 3116 }, { "epoch": 0.24882254330645806, "grad_norm": 0.26564669002591657, "learning_rate": 1.759225128751044e-05, "loss": 0.2059, "step": 3117 }, { "epoch": 0.24890237087890157, "grad_norm": 0.3053966265677154, "learning_rate": 1.7590568343666186e-05, "loss": 0.1741, "step": 3118 }, { "epoch": 0.2489821984513451, "grad_norm": 0.3436500654337431, "learning_rate": 1.7588884892422478e-05, "loss": 0.1806, "step": 3119 }, { "epoch": 0.24906202602378863, "grad_norm": 0.3350746123305785, "learning_rate": 1.758720093389185e-05, "loss": 0.2109, "step": 3120 }, { "epoch": 0.24914185359623214, "grad_norm": 0.3276498774215914, "learning_rate": 1.758551646818686e-05, "loss": 0.2335, "step": 3121 }, { "epoch": 0.24922168116867566, "grad_norm": 0.29493820345035565, "learning_rate": 1.7583831495420115e-05, "loss": 0.2162, "step": 3122 }, { "epoch": 0.24930150874111917, "grad_norm": 0.3709584057997565, "learning_rate": 1.7582146015704246e-05, "loss": 0.1615, "step": 3123 }, { "epoch": 0.2493813363135627, "grad_norm": 0.36982982741015963, "learning_rate": 1.7580460029151926e-05, "loss": 0.1822, "step": 3124 }, { "epoch": 0.24946116388600623, "grad_norm": 0.29961239012643537, "learning_rate": 1.757877353587585e-05, "loss": 0.1934, "step": 3125 }, { "epoch": 0.24954099145844974, "grad_norm": 0.30704500666193524, "learning_rate": 1.757708653598875e-05, "loss": 0.2767, "step": 3126 }, { "epoch": 0.24962081903089328, "grad_norm": 0.3318130355921206, "learning_rate": 1.7575399029603407e-05, "loss": 0.1526, "step": 3127 }, { "epoch": 0.2497006466033368, "grad_norm": 0.33202479185682804, "learning_rate": 1.7573711016832616e-05, "loss": 0.1885, "step": 3128 }, { "epoch": 0.2497804741757803, "grad_norm": 0.29892387737295256, "learning_rate": 1.7572022497789216e-05, "loss": 0.2496, "step": 3129 }, { "epoch": 0.24986030174822382, "grad_norm": 0.32925151161584504, "learning_rate": 1.757033347258608e-05, "loss": 0.2054, "step": 3130 }, { "epoch": 0.24994012932066736, "grad_norm": 0.299600898377559, "learning_rate": 1.7568643941336108e-05, "loss": 0.1782, "step": 3131 }, { "epoch": 0.2500199568931109, "grad_norm": 0.32483660213862475, "learning_rate": 1.756695390415224e-05, "loss": 0.2002, "step": 3132 }, { "epoch": 0.2500997844655544, "grad_norm": 0.3005915696765966, "learning_rate": 1.756526336114745e-05, "loss": 0.1958, "step": 3133 }, { "epoch": 0.25017961203799793, "grad_norm": 0.3931032812693296, "learning_rate": 1.7563572312434746e-05, "loss": 0.1856, "step": 3134 }, { "epoch": 0.25025943961044145, "grad_norm": 0.41568942147824534, "learning_rate": 1.756188075812716e-05, "loss": 0.2617, "step": 3135 }, { "epoch": 0.25033926718288496, "grad_norm": 0.3317341832474697, "learning_rate": 1.7560188698337776e-05, "loss": 0.2339, "step": 3136 }, { "epoch": 0.2504190947553285, "grad_norm": 0.29704852187008085, "learning_rate": 1.7558496133179695e-05, "loss": 0.2861, "step": 3137 }, { "epoch": 0.250498922327772, "grad_norm": 0.4776051156331242, "learning_rate": 1.7556803062766055e-05, "loss": 0.173, "step": 3138 }, { "epoch": 0.25057874990021556, "grad_norm": 0.30668377113238054, "learning_rate": 1.7555109487210043e-05, "loss": 0.2092, "step": 3139 }, { "epoch": 0.25065857747265907, "grad_norm": 0.27197679261381463, "learning_rate": 1.7553415406624856e-05, "loss": 0.1715, "step": 3140 }, { "epoch": 0.2507384050451026, "grad_norm": 0.3599937949957457, "learning_rate": 1.7551720821123742e-05, "loss": 0.1691, "step": 3141 }, { "epoch": 0.2508182326175461, "grad_norm": 0.39312206413150497, "learning_rate": 1.7550025730819978e-05, "loss": 0.1782, "step": 3142 }, { "epoch": 0.2508980601899896, "grad_norm": 0.39185907746902215, "learning_rate": 1.7548330135826868e-05, "loss": 0.1949, "step": 3143 }, { "epoch": 0.2509778877624331, "grad_norm": 0.34996519425349454, "learning_rate": 1.7546634036257762e-05, "loss": 0.1856, "step": 3144 }, { "epoch": 0.25105771533487664, "grad_norm": 0.28871936700175294, "learning_rate": 1.754493743222604e-05, "loss": 0.2265, "step": 3145 }, { "epoch": 0.2511375429073202, "grad_norm": 0.35547486875867457, "learning_rate": 1.754324032384511e-05, "loss": 0.2003, "step": 3146 }, { "epoch": 0.2512173704797637, "grad_norm": 0.33182789272269975, "learning_rate": 1.754154271122841e-05, "loss": 0.189, "step": 3147 }, { "epoch": 0.25129719805220724, "grad_norm": 0.36622408191999756, "learning_rate": 1.7539844594489425e-05, "loss": 0.2272, "step": 3148 }, { "epoch": 0.25137702562465075, "grad_norm": 0.3367478745119418, "learning_rate": 1.753814597374167e-05, "loss": 0.2094, "step": 3149 }, { "epoch": 0.25145685319709427, "grad_norm": 0.34877268945665474, "learning_rate": 1.753644684909869e-05, "loss": 0.1977, "step": 3150 }, { "epoch": 0.2515366807695378, "grad_norm": 0.3840492919186214, "learning_rate": 1.7534747220674057e-05, "loss": 0.2488, "step": 3151 }, { "epoch": 0.2516165083419813, "grad_norm": 0.43615117851135793, "learning_rate": 1.7533047088581396e-05, "loss": 0.2075, "step": 3152 }, { "epoch": 0.25169633591442486, "grad_norm": 0.2636424951357789, "learning_rate": 1.7531346452934347e-05, "loss": 0.2303, "step": 3153 }, { "epoch": 0.2517761634868684, "grad_norm": 0.2726451249577152, "learning_rate": 1.7529645313846594e-05, "loss": 0.1872, "step": 3154 }, { "epoch": 0.2518559910593119, "grad_norm": 0.37041053639823346, "learning_rate": 1.7527943671431846e-05, "loss": 0.2114, "step": 3155 }, { "epoch": 0.2519358186317554, "grad_norm": 0.40943132802172, "learning_rate": 1.7526241525803858e-05, "loss": 0.2649, "step": 3156 }, { "epoch": 0.2520156462041989, "grad_norm": 0.3812555223998707, "learning_rate": 1.7524538877076408e-05, "loss": 0.2043, "step": 3157 }, { "epoch": 0.25209547377664243, "grad_norm": 0.4062041333387874, "learning_rate": 1.7522835725363312e-05, "loss": 0.1819, "step": 3158 }, { "epoch": 0.252175301349086, "grad_norm": 0.4014452290675488, "learning_rate": 1.7521132070778417e-05, "loss": 0.1845, "step": 3159 }, { "epoch": 0.2522551289215295, "grad_norm": 0.34924707681616823, "learning_rate": 1.7519427913435608e-05, "loss": 0.218, "step": 3160 }, { "epoch": 0.25233495649397303, "grad_norm": 0.29875304162088845, "learning_rate": 1.75177232534488e-05, "loss": 0.2408, "step": 3161 }, { "epoch": 0.25241478406641654, "grad_norm": 0.38525479989280326, "learning_rate": 1.7516018090931947e-05, "loss": 0.2538, "step": 3162 }, { "epoch": 0.25249461163886006, "grad_norm": 0.3241718145447271, "learning_rate": 1.7514312425999024e-05, "loss": 0.2133, "step": 3163 }, { "epoch": 0.25257443921130357, "grad_norm": 0.32654518989558445, "learning_rate": 1.7512606258764055e-05, "loss": 0.2063, "step": 3164 }, { "epoch": 0.2526542667837471, "grad_norm": 0.26571904648949635, "learning_rate": 1.751089958934109e-05, "loss": 0.2434, "step": 3165 }, { "epoch": 0.25273409435619065, "grad_norm": 0.31763687092239906, "learning_rate": 1.750919241784421e-05, "loss": 0.2232, "step": 3166 }, { "epoch": 0.25281392192863417, "grad_norm": 0.3620273356195308, "learning_rate": 1.750748474438753e-05, "loss": 0.1834, "step": 3167 }, { "epoch": 0.2528937495010777, "grad_norm": 0.2995162312104989, "learning_rate": 1.750577656908521e-05, "loss": 0.2202, "step": 3168 }, { "epoch": 0.2529735770735212, "grad_norm": 0.37224361337786394, "learning_rate": 1.7504067892051427e-05, "loss": 0.2009, "step": 3169 }, { "epoch": 0.2530534046459647, "grad_norm": 0.33015831028351983, "learning_rate": 1.7502358713400403e-05, "loss": 0.1618, "step": 3170 }, { "epoch": 0.2531332322184082, "grad_norm": 0.3808595050686076, "learning_rate": 1.750064903324639e-05, "loss": 0.2346, "step": 3171 }, { "epoch": 0.25321305979085174, "grad_norm": 0.3275357187974882, "learning_rate": 1.749893885170367e-05, "loss": 0.2227, "step": 3172 }, { "epoch": 0.2532928873632953, "grad_norm": 0.28964358709134436, "learning_rate": 1.7497228168886568e-05, "loss": 0.2246, "step": 3173 }, { "epoch": 0.2533727149357388, "grad_norm": 0.27448151883790095, "learning_rate": 1.7495516984909426e-05, "loss": 0.1774, "step": 3174 }, { "epoch": 0.25345254250818233, "grad_norm": 0.31953551033937805, "learning_rate": 1.749380529988664e-05, "loss": 0.1676, "step": 3175 }, { "epoch": 0.25353237008062585, "grad_norm": 0.29945784463522196, "learning_rate": 1.749209311393263e-05, "loss": 0.202, "step": 3176 }, { "epoch": 0.25361219765306936, "grad_norm": 0.3404595658209405, "learning_rate": 1.7490380427161842e-05, "loss": 0.2251, "step": 3177 }, { "epoch": 0.2536920252255129, "grad_norm": 0.27270535635734466, "learning_rate": 1.7488667239688763e-05, "loss": 0.242, "step": 3178 }, { "epoch": 0.2537718527979564, "grad_norm": 0.32909762911680857, "learning_rate": 1.7486953551627915e-05, "loss": 0.1698, "step": 3179 }, { "epoch": 0.25385168037039996, "grad_norm": 0.32544002663918853, "learning_rate": 1.7485239363093853e-05, "loss": 0.1889, "step": 3180 }, { "epoch": 0.25393150794284347, "grad_norm": 0.337444359332905, "learning_rate": 1.7483524674201162e-05, "loss": 0.1856, "step": 3181 }, { "epoch": 0.254011335515287, "grad_norm": 0.35584054050623853, "learning_rate": 1.748180948506446e-05, "loss": 0.2356, "step": 3182 }, { "epoch": 0.2540911630877305, "grad_norm": 0.293063057575232, "learning_rate": 1.7480093795798404e-05, "loss": 0.2141, "step": 3183 }, { "epoch": 0.254170990660174, "grad_norm": 0.2934899095546368, "learning_rate": 1.747837760651768e-05, "loss": 0.1798, "step": 3184 }, { "epoch": 0.2542508182326175, "grad_norm": 0.3494857905323595, "learning_rate": 1.7476660917337006e-05, "loss": 0.1903, "step": 3185 }, { "epoch": 0.2543306458050611, "grad_norm": 0.29178312232957376, "learning_rate": 1.7474943728371138e-05, "loss": 0.1449, "step": 3186 }, { "epoch": 0.2544104733775046, "grad_norm": 0.3154767793387025, "learning_rate": 1.7473226039734866e-05, "loss": 0.199, "step": 3187 }, { "epoch": 0.2544903009499481, "grad_norm": 0.34686422701941827, "learning_rate": 1.7471507851543008e-05, "loss": 0.1683, "step": 3188 }, { "epoch": 0.25457012852239164, "grad_norm": 0.30402276766281056, "learning_rate": 1.7469789163910416e-05, "loss": 0.1497, "step": 3189 }, { "epoch": 0.25464995609483515, "grad_norm": 0.31399526477056844, "learning_rate": 1.746806997695198e-05, "loss": 0.2022, "step": 3190 }, { "epoch": 0.25472978366727866, "grad_norm": 0.2933876752498307, "learning_rate": 1.7466350290782622e-05, "loss": 0.2028, "step": 3191 }, { "epoch": 0.2548096112397222, "grad_norm": 0.3013978411866822, "learning_rate": 1.7464630105517294e-05, "loss": 0.1758, "step": 3192 }, { "epoch": 0.25488943881216575, "grad_norm": 0.331489048303586, "learning_rate": 1.7462909421270982e-05, "loss": 0.215, "step": 3193 }, { "epoch": 0.25496926638460926, "grad_norm": 0.3556058341817901, "learning_rate": 1.7461188238158714e-05, "loss": 0.2049, "step": 3194 }, { "epoch": 0.2550490939570528, "grad_norm": 0.2807403223096629, "learning_rate": 1.7459466556295534e-05, "loss": 0.1902, "step": 3195 }, { "epoch": 0.2551289215294963, "grad_norm": 0.31802148817790327, "learning_rate": 1.7457744375796536e-05, "loss": 0.2061, "step": 3196 }, { "epoch": 0.2552087491019398, "grad_norm": 0.35379914421237896, "learning_rate": 1.745602169677684e-05, "loss": 0.1858, "step": 3197 }, { "epoch": 0.2552885766743833, "grad_norm": 0.34370930246556514, "learning_rate": 1.7454298519351602e-05, "loss": 0.204, "step": 3198 }, { "epoch": 0.25536840424682683, "grad_norm": 0.2959021920601785, "learning_rate": 1.7452574843636005e-05, "loss": 0.1603, "step": 3199 }, { "epoch": 0.2554482318192704, "grad_norm": 0.296778162845943, "learning_rate": 1.7450850669745274e-05, "loss": 0.1733, "step": 3200 }, { "epoch": 0.2555280593917139, "grad_norm": 0.41263215559864497, "learning_rate": 1.744912599779466e-05, "loss": 0.2478, "step": 3201 }, { "epoch": 0.2556078869641574, "grad_norm": 0.30263079213669886, "learning_rate": 1.7447400827899457e-05, "loss": 0.1731, "step": 3202 }, { "epoch": 0.25568771453660094, "grad_norm": 0.3261736049862554, "learning_rate": 1.744567516017498e-05, "loss": 0.2317, "step": 3203 }, { "epoch": 0.25576754210904445, "grad_norm": 0.34482162690714874, "learning_rate": 1.7443948994736583e-05, "loss": 0.1751, "step": 3204 }, { "epoch": 0.25584736968148797, "grad_norm": 0.33617485513093825, "learning_rate": 1.7442222331699655e-05, "loss": 0.192, "step": 3205 }, { "epoch": 0.2559271972539315, "grad_norm": 0.3276592616065403, "learning_rate": 1.7440495171179616e-05, "loss": 0.1861, "step": 3206 }, { "epoch": 0.25600702482637505, "grad_norm": 0.3154963087112529, "learning_rate": 1.743876751329192e-05, "loss": 0.2067, "step": 3207 }, { "epoch": 0.25608685239881857, "grad_norm": 0.3146317643260589, "learning_rate": 1.743703935815205e-05, "loss": 0.2264, "step": 3208 }, { "epoch": 0.2561666799712621, "grad_norm": 0.36831754731360244, "learning_rate": 1.7435310705875536e-05, "loss": 0.1884, "step": 3209 }, { "epoch": 0.2562465075437056, "grad_norm": 0.3301126813112496, "learning_rate": 1.7433581556577923e-05, "loss": 0.2198, "step": 3210 }, { "epoch": 0.2563263351161491, "grad_norm": 0.29484253695035806, "learning_rate": 1.74318519103748e-05, "loss": 0.2085, "step": 3211 }, { "epoch": 0.2564061626885926, "grad_norm": 0.3148261745120703, "learning_rate": 1.7430121767381793e-05, "loss": 0.1667, "step": 3212 }, { "epoch": 0.25648599026103613, "grad_norm": 0.3291053142399405, "learning_rate": 1.7428391127714546e-05, "loss": 0.2534, "step": 3213 }, { "epoch": 0.2565658178334797, "grad_norm": 0.3087567071075757, "learning_rate": 1.742665999148875e-05, "loss": 0.2036, "step": 3214 }, { "epoch": 0.2566456454059232, "grad_norm": 0.341986477034975, "learning_rate": 1.7424928358820127e-05, "loss": 0.243, "step": 3215 }, { "epoch": 0.25672547297836673, "grad_norm": 0.31595535873075176, "learning_rate": 1.7423196229824418e-05, "loss": 0.1852, "step": 3216 }, { "epoch": 0.25680530055081024, "grad_norm": 0.2907328099977936, "learning_rate": 1.7421463604617428e-05, "loss": 0.1951, "step": 3217 }, { "epoch": 0.25688512812325376, "grad_norm": 0.32455897774761844, "learning_rate": 1.741973048331496e-05, "loss": 0.199, "step": 3218 }, { "epoch": 0.2569649556956973, "grad_norm": 0.318311430652235, "learning_rate": 1.7417996866032873e-05, "loss": 0.151, "step": 3219 }, { "epoch": 0.25704478326814084, "grad_norm": 0.2933249470269222, "learning_rate": 1.7416262752887052e-05, "loss": 0.1952, "step": 3220 }, { "epoch": 0.25712461084058436, "grad_norm": 0.34774087415103994, "learning_rate": 1.7414528143993416e-05, "loss": 0.1807, "step": 3221 }, { "epoch": 0.25720443841302787, "grad_norm": 0.267871488771239, "learning_rate": 1.7412793039467915e-05, "loss": 0.2488, "step": 3222 }, { "epoch": 0.2572842659854714, "grad_norm": 0.2691669838978398, "learning_rate": 1.7411057439426536e-05, "loss": 0.2053, "step": 3223 }, { "epoch": 0.2573640935579149, "grad_norm": 0.3202605417603142, "learning_rate": 1.7409321343985298e-05, "loss": 0.1805, "step": 3224 }, { "epoch": 0.2574439211303584, "grad_norm": 0.304855975804567, "learning_rate": 1.7407584753260246e-05, "loss": 0.1979, "step": 3225 }, { "epoch": 0.2575237487028019, "grad_norm": 0.31852385018099166, "learning_rate": 1.7405847667367468e-05, "loss": 0.2406, "step": 3226 }, { "epoch": 0.2576035762752455, "grad_norm": 0.2753558572737586, "learning_rate": 1.7404110086423082e-05, "loss": 0.1975, "step": 3227 }, { "epoch": 0.257683403847689, "grad_norm": 0.3134146182754128, "learning_rate": 1.740237201054324e-05, "loss": 0.1956, "step": 3228 }, { "epoch": 0.2577632314201325, "grad_norm": 0.36917613744042294, "learning_rate": 1.7400633439844122e-05, "loss": 0.2232, "step": 3229 }, { "epoch": 0.25784305899257604, "grad_norm": 0.33718711687866026, "learning_rate": 1.7398894374441947e-05, "loss": 0.2449, "step": 3230 }, { "epoch": 0.25792288656501955, "grad_norm": 0.3993781960185278, "learning_rate": 1.7397154814452964e-05, "loss": 0.1407, "step": 3231 }, { "epoch": 0.25800271413746306, "grad_norm": 0.35412280542352426, "learning_rate": 1.7395414759993456e-05, "loss": 0.1799, "step": 3232 }, { "epoch": 0.2580825417099066, "grad_norm": 0.3792914864879229, "learning_rate": 1.739367421117973e-05, "loss": 0.1706, "step": 3233 }, { "epoch": 0.25816236928235015, "grad_norm": 0.3879212948150934, "learning_rate": 1.739193316812815e-05, "loss": 0.2262, "step": 3234 }, { "epoch": 0.25824219685479366, "grad_norm": 0.31291967728757625, "learning_rate": 1.739019163095509e-05, "loss": 0.213, "step": 3235 }, { "epoch": 0.2583220244272372, "grad_norm": 0.28695305072402816, "learning_rate": 1.7388449599776968e-05, "loss": 0.1662, "step": 3236 }, { "epoch": 0.2584018519996807, "grad_norm": 0.3009748399655212, "learning_rate": 1.7386707074710227e-05, "loss": 0.2121, "step": 3237 }, { "epoch": 0.2584816795721242, "grad_norm": 0.2782090498688065, "learning_rate": 1.738496405587135e-05, "loss": 0.212, "step": 3238 }, { "epoch": 0.2585615071445677, "grad_norm": 0.29978309002552017, "learning_rate": 1.7383220543376855e-05, "loss": 0.2352, "step": 3239 }, { "epoch": 0.25864133471701123, "grad_norm": 0.2899246265871769, "learning_rate": 1.738147653734328e-05, "loss": 0.2238, "step": 3240 }, { "epoch": 0.2587211622894548, "grad_norm": 0.3423050156920425, "learning_rate": 1.7379732037887208e-05, "loss": 0.2151, "step": 3241 }, { "epoch": 0.2588009898618983, "grad_norm": 0.3097337769281277, "learning_rate": 1.737798704512526e-05, "loss": 0.18, "step": 3242 }, { "epoch": 0.2588808174343418, "grad_norm": 0.31475511417414803, "learning_rate": 1.7376241559174076e-05, "loss": 0.1277, "step": 3243 }, { "epoch": 0.25896064500678534, "grad_norm": 0.31728858827678535, "learning_rate": 1.737449558015033e-05, "loss": 0.1652, "step": 3244 }, { "epoch": 0.25904047257922885, "grad_norm": 0.3505362875051781, "learning_rate": 1.737274910817074e-05, "loss": 0.2003, "step": 3245 }, { "epoch": 0.25912030015167237, "grad_norm": 0.3971526728813899, "learning_rate": 1.737100214335205e-05, "loss": 0.2573, "step": 3246 }, { "epoch": 0.25920012772411594, "grad_norm": 0.2943095842302147, "learning_rate": 1.7369254685811038e-05, "loss": 0.1773, "step": 3247 }, { "epoch": 0.25927995529655945, "grad_norm": 0.3518930434575484, "learning_rate": 1.7367506735664512e-05, "loss": 0.2004, "step": 3248 }, { "epoch": 0.25935978286900296, "grad_norm": 0.32746752944638563, "learning_rate": 1.7365758293029318e-05, "loss": 0.1972, "step": 3249 }, { "epoch": 0.2594396104414465, "grad_norm": 0.3896453107579995, "learning_rate": 1.736400935802233e-05, "loss": 0.1607, "step": 3250 }, { "epoch": 0.25951943801389, "grad_norm": 0.3359921088558479, "learning_rate": 1.7362259930760463e-05, "loss": 0.1834, "step": 3251 }, { "epoch": 0.2595992655863335, "grad_norm": 0.46470563547010885, "learning_rate": 1.7360510011360648e-05, "loss": 0.1685, "step": 3252 }, { "epoch": 0.259679093158777, "grad_norm": 0.31930869203412954, "learning_rate": 1.7358759599939872e-05, "loss": 0.1598, "step": 3253 }, { "epoch": 0.2597589207312206, "grad_norm": 0.2622343488713454, "learning_rate": 1.7357008696615138e-05, "loss": 0.2088, "step": 3254 }, { "epoch": 0.2598387483036641, "grad_norm": 0.34122376792221637, "learning_rate": 1.7355257301503487e-05, "loss": 0.1847, "step": 3255 }, { "epoch": 0.2599185758761076, "grad_norm": 0.33986188598939787, "learning_rate": 1.7353505414721994e-05, "loss": 0.1801, "step": 3256 }, { "epoch": 0.25999840344855113, "grad_norm": 0.3323979773134481, "learning_rate": 1.7351753036387763e-05, "loss": 0.2044, "step": 3257 }, { "epoch": 0.26007823102099464, "grad_norm": 0.41051265861824293, "learning_rate": 1.735000016661794e-05, "loss": 0.1957, "step": 3258 }, { "epoch": 0.26015805859343816, "grad_norm": 0.35090766932524303, "learning_rate": 1.7348246805529688e-05, "loss": 0.2108, "step": 3259 }, { "epoch": 0.26023788616588167, "grad_norm": 0.32922240347805737, "learning_rate": 1.734649295324022e-05, "loss": 0.2022, "step": 3260 }, { "epoch": 0.26031771373832524, "grad_norm": 0.3992931977995018, "learning_rate": 1.7344738609866772e-05, "loss": 0.1618, "step": 3261 }, { "epoch": 0.26039754131076875, "grad_norm": 0.309960304035828, "learning_rate": 1.7342983775526612e-05, "loss": 0.1851, "step": 3262 }, { "epoch": 0.26047736888321227, "grad_norm": 0.4375085866865086, "learning_rate": 1.7341228450337046e-05, "loss": 0.1479, "step": 3263 }, { "epoch": 0.2605571964556558, "grad_norm": 0.3568198538890455, "learning_rate": 1.733947263441541e-05, "loss": 0.1528, "step": 3264 }, { "epoch": 0.2606370240280993, "grad_norm": 0.306562926632739, "learning_rate": 1.7337716327879073e-05, "loss": 0.2533, "step": 3265 }, { "epoch": 0.2607168516005428, "grad_norm": 0.37590388043152245, "learning_rate": 1.733595953084544e-05, "loss": 0.1659, "step": 3266 }, { "epoch": 0.2607966791729863, "grad_norm": 0.35429745245959665, "learning_rate": 1.7334202243431946e-05, "loss": 0.1938, "step": 3267 }, { "epoch": 0.2608765067454299, "grad_norm": 0.2840054436130403, "learning_rate": 1.7332444465756052e-05, "loss": 0.1987, "step": 3268 }, { "epoch": 0.2609563343178734, "grad_norm": 0.3152627604269492, "learning_rate": 1.7330686197935267e-05, "loss": 0.1976, "step": 3269 }, { "epoch": 0.2610361618903169, "grad_norm": 0.34514161648833924, "learning_rate": 1.7328927440087118e-05, "loss": 0.1968, "step": 3270 }, { "epoch": 0.26111598946276043, "grad_norm": 0.356069889918866, "learning_rate": 1.7327168192329173e-05, "loss": 0.1676, "step": 3271 }, { "epoch": 0.26119581703520395, "grad_norm": 0.28427017089987194, "learning_rate": 1.732540845477903e-05, "loss": 0.2202, "step": 3272 }, { "epoch": 0.26127564460764746, "grad_norm": 0.3509159779622568, "learning_rate": 1.7323648227554326e-05, "loss": 0.2083, "step": 3273 }, { "epoch": 0.26135547218009103, "grad_norm": 0.31894379578977355, "learning_rate": 1.7321887510772718e-05, "loss": 0.1679, "step": 3274 }, { "epoch": 0.26143529975253454, "grad_norm": 0.3396178755495063, "learning_rate": 1.7320126304551904e-05, "loss": 0.198, "step": 3275 }, { "epoch": 0.26151512732497806, "grad_norm": 0.3548553105313705, "learning_rate": 1.731836460900962e-05, "loss": 0.2035, "step": 3276 }, { "epoch": 0.26159495489742157, "grad_norm": 0.3016333647923337, "learning_rate": 1.731660242426362e-05, "loss": 0.2001, "step": 3277 }, { "epoch": 0.2616747824698651, "grad_norm": 0.315564843554076, "learning_rate": 1.73148397504317e-05, "loss": 0.2133, "step": 3278 }, { "epoch": 0.2617546100423086, "grad_norm": 0.31062987803717323, "learning_rate": 1.7313076587631695e-05, "loss": 0.193, "step": 3279 }, { "epoch": 0.2618344376147521, "grad_norm": 0.314032765704083, "learning_rate": 1.7311312935981464e-05, "loss": 0.2149, "step": 3280 }, { "epoch": 0.2619142651871957, "grad_norm": 0.3650028551502364, "learning_rate": 1.7309548795598893e-05, "loss": 0.1572, "step": 3281 }, { "epoch": 0.2619940927596392, "grad_norm": 0.26619053037201723, "learning_rate": 1.730778416660191e-05, "loss": 0.2583, "step": 3282 }, { "epoch": 0.2620739203320827, "grad_norm": 0.3048047692462293, "learning_rate": 1.730601904910848e-05, "loss": 0.1771, "step": 3283 }, { "epoch": 0.2621537479045262, "grad_norm": 0.33854767616024556, "learning_rate": 1.7304253443236588e-05, "loss": 0.1962, "step": 3284 }, { "epoch": 0.26223357547696974, "grad_norm": 0.34325175003349484, "learning_rate": 1.7302487349104257e-05, "loss": 0.1895, "step": 3285 }, { "epoch": 0.26231340304941325, "grad_norm": 0.3037297694492904, "learning_rate": 1.7300720766829545e-05, "loss": 0.2092, "step": 3286 }, { "epoch": 0.26239323062185677, "grad_norm": 0.35816797341625434, "learning_rate": 1.7298953696530547e-05, "loss": 0.2207, "step": 3287 }, { "epoch": 0.26247305819430033, "grad_norm": 0.27161791919609823, "learning_rate": 1.7297186138325375e-05, "loss": 0.2106, "step": 3288 }, { "epoch": 0.26255288576674385, "grad_norm": 0.33822673695258354, "learning_rate": 1.729541809233219e-05, "loss": 0.2026, "step": 3289 }, { "epoch": 0.26263271333918736, "grad_norm": 0.3321500857483134, "learning_rate": 1.7293649558669177e-05, "loss": 0.1988, "step": 3290 }, { "epoch": 0.2627125409116309, "grad_norm": 0.32708091177075405, "learning_rate": 1.7291880537454556e-05, "loss": 0.1922, "step": 3291 }, { "epoch": 0.2627923684840744, "grad_norm": 0.3789164981103287, "learning_rate": 1.7290111028806573e-05, "loss": 0.1984, "step": 3292 }, { "epoch": 0.2628721960565179, "grad_norm": 0.311910579149501, "learning_rate": 1.7288341032843524e-05, "loss": 0.1858, "step": 3293 }, { "epoch": 0.2629520236289614, "grad_norm": 0.3289511235970833, "learning_rate": 1.7286570549683714e-05, "loss": 0.192, "step": 3294 }, { "epoch": 0.263031851201405, "grad_norm": 0.3011067091718522, "learning_rate": 1.7284799579445503e-05, "loss": 0.1559, "step": 3295 }, { "epoch": 0.2631116787738485, "grad_norm": 0.2724927661151443, "learning_rate": 1.7283028122247268e-05, "loss": 0.1718, "step": 3296 }, { "epoch": 0.263191506346292, "grad_norm": 0.3017553128406506, "learning_rate": 1.7281256178207427e-05, "loss": 0.1597, "step": 3297 }, { "epoch": 0.26327133391873553, "grad_norm": 0.30647672632364775, "learning_rate": 1.727948374744442e-05, "loss": 0.1823, "step": 3298 }, { "epoch": 0.26335116149117904, "grad_norm": 0.29232375556393175, "learning_rate": 1.7277710830076736e-05, "loss": 0.1945, "step": 3299 }, { "epoch": 0.26343098906362256, "grad_norm": 0.32662146246527385, "learning_rate": 1.7275937426222882e-05, "loss": 0.1893, "step": 3300 }, { "epoch": 0.26351081663606607, "grad_norm": 0.33501177571126484, "learning_rate": 1.727416353600141e-05, "loss": 0.1857, "step": 3301 }, { "epoch": 0.26359064420850964, "grad_norm": 0.3003580021861763, "learning_rate": 1.7272389159530887e-05, "loss": 0.2473, "step": 3302 }, { "epoch": 0.26367047178095315, "grad_norm": 0.33430109710824013, "learning_rate": 1.7270614296929933e-05, "loss": 0.1991, "step": 3303 }, { "epoch": 0.26375029935339667, "grad_norm": 0.37803908892526017, "learning_rate": 1.7268838948317187e-05, "loss": 0.1846, "step": 3304 }, { "epoch": 0.2638301269258402, "grad_norm": 0.306736207665578, "learning_rate": 1.726706311381132e-05, "loss": 0.2004, "step": 3305 }, { "epoch": 0.2639099544982837, "grad_norm": 0.2648286130957521, "learning_rate": 1.7265286793531043e-05, "loss": 0.1967, "step": 3306 }, { "epoch": 0.2639897820707272, "grad_norm": 0.30453780208957565, "learning_rate": 1.7263509987595096e-05, "loss": 0.1618, "step": 3307 }, { "epoch": 0.2640696096431708, "grad_norm": 0.33027231355110187, "learning_rate": 1.726173269612225e-05, "loss": 0.1802, "step": 3308 }, { "epoch": 0.2641494372156143, "grad_norm": 0.39556362538268597, "learning_rate": 1.725995491923131e-05, "loss": 0.1611, "step": 3309 }, { "epoch": 0.2642292647880578, "grad_norm": 0.30256661556286024, "learning_rate": 1.7258176657041117e-05, "loss": 0.1886, "step": 3310 }, { "epoch": 0.2643090923605013, "grad_norm": 0.2949407422902235, "learning_rate": 1.7256397909670537e-05, "loss": 0.2054, "step": 3311 }, { "epoch": 0.26438891993294483, "grad_norm": 0.27190567805604393, "learning_rate": 1.7254618677238473e-05, "loss": 0.1671, "step": 3312 }, { "epoch": 0.26446874750538835, "grad_norm": 0.28990139991857455, "learning_rate": 1.725283895986386e-05, "loss": 0.2187, "step": 3313 }, { "epoch": 0.26454857507783186, "grad_norm": 0.3027497995231341, "learning_rate": 1.7251058757665666e-05, "loss": 0.1859, "step": 3314 }, { "epoch": 0.26462840265027543, "grad_norm": 0.320928295748811, "learning_rate": 1.724927807076289e-05, "loss": 0.1772, "step": 3315 }, { "epoch": 0.26470823022271894, "grad_norm": 0.305287735572376, "learning_rate": 1.7247496899274564e-05, "loss": 0.2136, "step": 3316 }, { "epoch": 0.26478805779516246, "grad_norm": 0.3341942058049639, "learning_rate": 1.724571524331975e-05, "loss": 0.1867, "step": 3317 }, { "epoch": 0.26486788536760597, "grad_norm": 0.330780514188049, "learning_rate": 1.7243933103017546e-05, "loss": 0.1517, "step": 3318 }, { "epoch": 0.2649477129400495, "grad_norm": 0.3313533977247803, "learning_rate": 1.7242150478487085e-05, "loss": 0.2248, "step": 3319 }, { "epoch": 0.265027540512493, "grad_norm": 0.33239073322348117, "learning_rate": 1.724036736984752e-05, "loss": 0.1972, "step": 3320 }, { "epoch": 0.2651073680849365, "grad_norm": 0.32725805671998703, "learning_rate": 1.723858377721805e-05, "loss": 0.211, "step": 3321 }, { "epoch": 0.2651871956573801, "grad_norm": 0.31055783751379296, "learning_rate": 1.7236799700717904e-05, "loss": 0.2588, "step": 3322 }, { "epoch": 0.2652670232298236, "grad_norm": 0.3332679540015526, "learning_rate": 1.7235015140466334e-05, "loss": 0.1531, "step": 3323 }, { "epoch": 0.2653468508022671, "grad_norm": 0.42258131902649043, "learning_rate": 1.7233230096582635e-05, "loss": 0.2303, "step": 3324 }, { "epoch": 0.2654266783747106, "grad_norm": 0.3197176503264511, "learning_rate": 1.723144456918613e-05, "loss": 0.2217, "step": 3325 }, { "epoch": 0.26550650594715414, "grad_norm": 0.3383998995230394, "learning_rate": 1.722965855839617e-05, "loss": 0.1997, "step": 3326 }, { "epoch": 0.26558633351959765, "grad_norm": 0.28899733341189576, "learning_rate": 1.722787206433215e-05, "loss": 0.2513, "step": 3327 }, { "epoch": 0.26566616109204116, "grad_norm": 0.321457769509821, "learning_rate": 1.7226085087113483e-05, "loss": 0.2206, "step": 3328 }, { "epoch": 0.26574598866448473, "grad_norm": 0.31235685430693144, "learning_rate": 1.722429762685963e-05, "loss": 0.2581, "step": 3329 }, { "epoch": 0.26582581623692825, "grad_norm": 0.37750928392722355, "learning_rate": 1.7222509683690065e-05, "loss": 0.1594, "step": 3330 }, { "epoch": 0.26590564380937176, "grad_norm": 0.38618003760233005, "learning_rate": 1.722072125772431e-05, "loss": 0.1838, "step": 3331 }, { "epoch": 0.2659854713818153, "grad_norm": 0.3611842058637029, "learning_rate": 1.7218932349081918e-05, "loss": 0.1565, "step": 3332 }, { "epoch": 0.2660652989542588, "grad_norm": 0.42903188866059744, "learning_rate": 1.7217142957882465e-05, "loss": 0.2097, "step": 3333 }, { "epoch": 0.2661451265267023, "grad_norm": 0.3176159028551993, "learning_rate": 1.7215353084245564e-05, "loss": 0.2056, "step": 3334 }, { "epoch": 0.26622495409914587, "grad_norm": 0.372108313771693, "learning_rate": 1.7213562728290868e-05, "loss": 0.1976, "step": 3335 }, { "epoch": 0.2663047816715894, "grad_norm": 0.3513906204097282, "learning_rate": 1.7211771890138048e-05, "loss": 0.2437, "step": 3336 }, { "epoch": 0.2663846092440329, "grad_norm": 0.37219239368484497, "learning_rate": 1.7209980569906818e-05, "loss": 0.177, "step": 3337 }, { "epoch": 0.2664644368164764, "grad_norm": 0.33575535040931875, "learning_rate": 1.7208188767716924e-05, "loss": 0.1769, "step": 3338 }, { "epoch": 0.2665442643889199, "grad_norm": 0.3181146514851531, "learning_rate": 1.720639648368813e-05, "loss": 0.2249, "step": 3339 }, { "epoch": 0.26662409196136344, "grad_norm": 0.31322961128660376, "learning_rate": 1.720460371794025e-05, "loss": 0.2622, "step": 3340 }, { "epoch": 0.26670391953380695, "grad_norm": 0.3070787803294704, "learning_rate": 1.7202810470593126e-05, "loss": 0.2013, "step": 3341 }, { "epoch": 0.2667837471062505, "grad_norm": 0.3093725178800597, "learning_rate": 1.7201016741766627e-05, "loss": 0.1778, "step": 3342 }, { "epoch": 0.26686357467869404, "grad_norm": 0.30473782809434663, "learning_rate": 1.7199222531580656e-05, "loss": 0.2075, "step": 3343 }, { "epoch": 0.26694340225113755, "grad_norm": 0.34209503705050054, "learning_rate": 1.719742784015515e-05, "loss": 0.1977, "step": 3344 }, { "epoch": 0.26702322982358107, "grad_norm": 0.33295826388133726, "learning_rate": 1.719563266761007e-05, "loss": 0.1288, "step": 3345 }, { "epoch": 0.2671030573960246, "grad_norm": 0.30041377810414466, "learning_rate": 1.719383701406543e-05, "loss": 0.165, "step": 3346 }, { "epoch": 0.2671828849684681, "grad_norm": 0.34154291529916403, "learning_rate": 1.7192040879641253e-05, "loss": 0.1792, "step": 3347 }, { "epoch": 0.2672627125409116, "grad_norm": 0.3385408932173722, "learning_rate": 1.7190244264457602e-05, "loss": 0.23, "step": 3348 }, { "epoch": 0.2673425401133552, "grad_norm": 0.30872702398841123, "learning_rate": 1.7188447168634578e-05, "loss": 0.1963, "step": 3349 }, { "epoch": 0.2674223676857987, "grad_norm": 0.2921757310275412, "learning_rate": 1.718664959229231e-05, "loss": 0.2049, "step": 3350 }, { "epoch": 0.2675021952582422, "grad_norm": 0.2728965241135345, "learning_rate": 1.7184851535550958e-05, "loss": 0.2182, "step": 3351 }, { "epoch": 0.2675820228306857, "grad_norm": 0.2918253712252462, "learning_rate": 1.7183052998530713e-05, "loss": 0.1866, "step": 3352 }, { "epoch": 0.26766185040312923, "grad_norm": 0.3229982915428424, "learning_rate": 1.71812539813518e-05, "loss": 0.1837, "step": 3353 }, { "epoch": 0.26774167797557274, "grad_norm": 0.2742707469694827, "learning_rate": 1.7179454484134476e-05, "loss": 0.2098, "step": 3354 }, { "epoch": 0.26782150554801626, "grad_norm": 0.42577283118638143, "learning_rate": 1.717765450699904e-05, "loss": 0.2138, "step": 3355 }, { "epoch": 0.26790133312045983, "grad_norm": 0.30817258538506126, "learning_rate": 1.7175854050065797e-05, "loss": 0.2011, "step": 3356 }, { "epoch": 0.26798116069290334, "grad_norm": 0.36844498320197583, "learning_rate": 1.717405311345511e-05, "loss": 0.1903, "step": 3357 }, { "epoch": 0.26806098826534686, "grad_norm": 0.3387912702896744, "learning_rate": 1.7172251697287366e-05, "loss": 0.2096, "step": 3358 }, { "epoch": 0.26814081583779037, "grad_norm": 0.33673633607361214, "learning_rate": 1.7170449801682978e-05, "loss": 0.2431, "step": 3359 }, { "epoch": 0.2682206434102339, "grad_norm": 0.411923150085662, "learning_rate": 1.71686474267624e-05, "loss": 0.1559, "step": 3360 }, { "epoch": 0.2683004709826774, "grad_norm": 0.30154188136744153, "learning_rate": 1.7166844572646107e-05, "loss": 0.2115, "step": 3361 }, { "epoch": 0.26838029855512097, "grad_norm": 0.35369169776699555, "learning_rate": 1.7165041239454616e-05, "loss": 0.2077, "step": 3362 }, { "epoch": 0.2684601261275645, "grad_norm": 0.3203080620652034, "learning_rate": 1.7163237427308477e-05, "loss": 0.1834, "step": 3363 }, { "epoch": 0.268539953700008, "grad_norm": 0.3088382613342674, "learning_rate": 1.7161433136328264e-05, "loss": 0.1588, "step": 3364 }, { "epoch": 0.2686197812724515, "grad_norm": 0.2747533453560686, "learning_rate": 1.7159628366634583e-05, "loss": 0.1792, "step": 3365 }, { "epoch": 0.268699608844895, "grad_norm": 0.3199300591626945, "learning_rate": 1.715782311834808e-05, "loss": 0.2267, "step": 3366 }, { "epoch": 0.26877943641733854, "grad_norm": 0.2788416165453442, "learning_rate": 1.715601739158943e-05, "loss": 0.1801, "step": 3367 }, { "epoch": 0.26885926398978205, "grad_norm": 0.3077542373717439, "learning_rate": 1.7154211186479334e-05, "loss": 0.1808, "step": 3368 }, { "epoch": 0.2689390915622256, "grad_norm": 0.3170623747811197, "learning_rate": 1.7152404503138536e-05, "loss": 0.2053, "step": 3369 }, { "epoch": 0.26901891913466913, "grad_norm": 0.29824719355169543, "learning_rate": 1.71505973416878e-05, "loss": 0.2125, "step": 3370 }, { "epoch": 0.26909874670711265, "grad_norm": 0.3155971291726465, "learning_rate": 1.714878970224793e-05, "loss": 0.1839, "step": 3371 }, { "epoch": 0.26917857427955616, "grad_norm": 0.335263787074152, "learning_rate": 1.714698158493976e-05, "loss": 0.2061, "step": 3372 }, { "epoch": 0.2692584018519997, "grad_norm": 0.29011300188204153, "learning_rate": 1.7145172989884152e-05, "loss": 0.2156, "step": 3373 }, { "epoch": 0.2693382294244432, "grad_norm": 0.32770147102095154, "learning_rate": 1.7143363917202012e-05, "loss": 0.1966, "step": 3374 }, { "epoch": 0.2694180569968867, "grad_norm": 0.2923312024790821, "learning_rate": 1.7141554367014256e-05, "loss": 0.2523, "step": 3375 }, { "epoch": 0.26949788456933027, "grad_norm": 0.37102255633250597, "learning_rate": 1.713974433944186e-05, "loss": 0.1816, "step": 3376 }, { "epoch": 0.2695777121417738, "grad_norm": 0.3488671192997756, "learning_rate": 1.7137933834605803e-05, "loss": 0.2009, "step": 3377 }, { "epoch": 0.2696575397142173, "grad_norm": 0.33574257765348325, "learning_rate": 1.7136122852627122e-05, "loss": 0.165, "step": 3378 }, { "epoch": 0.2697373672866608, "grad_norm": 0.3270561221492415, "learning_rate": 1.7134311393626863e-05, "loss": 0.1571, "step": 3379 }, { "epoch": 0.2698171948591043, "grad_norm": 0.2908959561473784, "learning_rate": 1.7132499457726125e-05, "loss": 0.1619, "step": 3380 }, { "epoch": 0.26989702243154784, "grad_norm": 0.3456753034936458, "learning_rate": 1.7130687045046025e-05, "loss": 0.1781, "step": 3381 }, { "epoch": 0.26997685000399135, "grad_norm": 0.3239419195229277, "learning_rate": 1.7128874155707715e-05, "loss": 0.2489, "step": 3382 }, { "epoch": 0.2700566775764349, "grad_norm": 0.28368817599878243, "learning_rate": 1.712706078983238e-05, "loss": 0.1986, "step": 3383 }, { "epoch": 0.27013650514887844, "grad_norm": 0.2975087053809315, "learning_rate": 1.7125246947541236e-05, "loss": 0.1777, "step": 3384 }, { "epoch": 0.27021633272132195, "grad_norm": 0.42830718235443926, "learning_rate": 1.7123432628955533e-05, "loss": 0.1669, "step": 3385 }, { "epoch": 0.27029616029376546, "grad_norm": 0.33479260215657874, "learning_rate": 1.712161783419654e-05, "loss": 0.1846, "step": 3386 }, { "epoch": 0.270375987866209, "grad_norm": 0.2728478916672122, "learning_rate": 1.7119802563385588e-05, "loss": 0.1897, "step": 3387 }, { "epoch": 0.2704558154386525, "grad_norm": 0.3199876046539419, "learning_rate": 1.7117986816644007e-05, "loss": 0.1932, "step": 3388 }, { "epoch": 0.270535643011096, "grad_norm": 0.371053502926605, "learning_rate": 1.7116170594093177e-05, "loss": 0.2225, "step": 3389 }, { "epoch": 0.2706154705835396, "grad_norm": 0.3183842810918703, "learning_rate": 1.7114353895854505e-05, "loss": 0.1967, "step": 3390 }, { "epoch": 0.2706952981559831, "grad_norm": 0.28211128453161455, "learning_rate": 1.7112536722049425e-05, "loss": 0.2097, "step": 3391 }, { "epoch": 0.2707751257284266, "grad_norm": 0.3489277497187111, "learning_rate": 1.711071907279942e-05, "loss": 0.21, "step": 3392 }, { "epoch": 0.2708549533008701, "grad_norm": 0.3602945090640929, "learning_rate": 1.710890094822598e-05, "loss": 0.1724, "step": 3393 }, { "epoch": 0.27093478087331363, "grad_norm": 0.3757949492616525, "learning_rate": 1.7107082348450647e-05, "loss": 0.1415, "step": 3394 }, { "epoch": 0.27101460844575714, "grad_norm": 0.3135076878047981, "learning_rate": 1.7105263273594982e-05, "loss": 0.2321, "step": 3395 }, { "epoch": 0.2710944360182007, "grad_norm": 0.32746237579217735, "learning_rate": 1.7103443723780587e-05, "loss": 0.1756, "step": 3396 }, { "epoch": 0.2711742635906442, "grad_norm": 0.32856155928335096, "learning_rate": 1.710162369912909e-05, "loss": 0.191, "step": 3397 }, { "epoch": 0.27125409116308774, "grad_norm": 0.29874542636506496, "learning_rate": 1.7099803199762153e-05, "loss": 0.1799, "step": 3398 }, { "epoch": 0.27133391873553125, "grad_norm": 0.2557670516514751, "learning_rate": 1.709798222580147e-05, "loss": 0.2075, "step": 3399 }, { "epoch": 0.27141374630797477, "grad_norm": 0.3480939751030981, "learning_rate": 1.709616077736876e-05, "loss": 0.1651, "step": 3400 }, { "epoch": 0.2714935738804183, "grad_norm": 0.3397328152651233, "learning_rate": 1.709433885458579e-05, "loss": 0.1868, "step": 3401 }, { "epoch": 0.2715734014528618, "grad_norm": 0.3426310906404084, "learning_rate": 1.7092516457574344e-05, "loss": 0.1919, "step": 3402 }, { "epoch": 0.27165322902530536, "grad_norm": 0.2714068016251163, "learning_rate": 1.709069358645624e-05, "loss": 0.1904, "step": 3403 }, { "epoch": 0.2717330565977489, "grad_norm": 0.3484503088435393, "learning_rate": 1.7088870241353327e-05, "loss": 0.2129, "step": 3404 }, { "epoch": 0.2718128841701924, "grad_norm": 0.3932194924890145, "learning_rate": 1.7087046422387494e-05, "loss": 0.1827, "step": 3405 }, { "epoch": 0.2718927117426359, "grad_norm": 0.3157086458938644, "learning_rate": 1.7085222129680653e-05, "loss": 0.1651, "step": 3406 }, { "epoch": 0.2719725393150794, "grad_norm": 0.307660580476229, "learning_rate": 1.7083397363354754e-05, "loss": 0.1933, "step": 3407 }, { "epoch": 0.27205236688752293, "grad_norm": 0.3995841678133441, "learning_rate": 1.7081572123531772e-05, "loss": 0.2591, "step": 3408 }, { "epoch": 0.27213219445996645, "grad_norm": 0.31544696451913806, "learning_rate": 1.7079746410333718e-05, "loss": 0.2086, "step": 3409 }, { "epoch": 0.27221202203241, "grad_norm": 0.2960337034184317, "learning_rate": 1.707792022388264e-05, "loss": 0.2781, "step": 3410 }, { "epoch": 0.27229184960485353, "grad_norm": 0.31839631895772397, "learning_rate": 1.7076093564300595e-05, "loss": 0.2144, "step": 3411 }, { "epoch": 0.27237167717729704, "grad_norm": 0.3160717550332414, "learning_rate": 1.7074266431709705e-05, "loss": 0.1725, "step": 3412 }, { "epoch": 0.27245150474974056, "grad_norm": 0.35222736357904966, "learning_rate": 1.7072438826232098e-05, "loss": 0.2478, "step": 3413 }, { "epoch": 0.27253133232218407, "grad_norm": 0.38078235715056524, "learning_rate": 1.7070610747989943e-05, "loss": 0.174, "step": 3414 }, { "epoch": 0.2726111598946276, "grad_norm": 0.32632698628061685, "learning_rate": 1.7068782197105443e-05, "loss": 0.2106, "step": 3415 }, { "epoch": 0.2726909874670711, "grad_norm": 0.3037829918197878, "learning_rate": 1.7066953173700825e-05, "loss": 0.132, "step": 3416 }, { "epoch": 0.27277081503951467, "grad_norm": 0.37631585652625604, "learning_rate": 1.7065123677898357e-05, "loss": 0.154, "step": 3417 }, { "epoch": 0.2728506426119582, "grad_norm": 0.32517316842788746, "learning_rate": 1.706329370982033e-05, "loss": 0.1547, "step": 3418 }, { "epoch": 0.2729304701844017, "grad_norm": 0.3070692138659221, "learning_rate": 1.706146326958907e-05, "loss": 0.2119, "step": 3419 }, { "epoch": 0.2730102977568452, "grad_norm": 0.41041449303116107, "learning_rate": 1.7059632357326937e-05, "loss": 0.1806, "step": 3420 }, { "epoch": 0.2730901253292887, "grad_norm": 0.3572117064553441, "learning_rate": 1.7057800973156317e-05, "loss": 0.2262, "step": 3421 }, { "epoch": 0.27316995290173224, "grad_norm": 0.3611685925574058, "learning_rate": 1.7055969117199635e-05, "loss": 0.1964, "step": 3422 }, { "epoch": 0.2732497804741758, "grad_norm": 0.40469919757915046, "learning_rate": 1.7054136789579338e-05, "loss": 0.1969, "step": 3423 }, { "epoch": 0.2733296080466193, "grad_norm": 0.3020977252165238, "learning_rate": 1.7052303990417916e-05, "loss": 0.1948, "step": 3424 }, { "epoch": 0.27340943561906283, "grad_norm": 0.322296366252246, "learning_rate": 1.705047071983788e-05, "loss": 0.2013, "step": 3425 }, { "epoch": 0.27348926319150635, "grad_norm": 0.25625764705165543, "learning_rate": 1.7048636977961774e-05, "loss": 0.1976, "step": 3426 }, { "epoch": 0.27356909076394986, "grad_norm": 0.3861120455227489, "learning_rate": 1.7046802764912188e-05, "loss": 0.1969, "step": 3427 }, { "epoch": 0.2736489183363934, "grad_norm": 0.278083739804955, "learning_rate": 1.704496808081172e-05, "loss": 0.2207, "step": 3428 }, { "epoch": 0.2737287459088369, "grad_norm": 0.31049032685794875, "learning_rate": 1.7043132925783017e-05, "loss": 0.2094, "step": 3429 }, { "epoch": 0.27380857348128046, "grad_norm": 0.2802867539150717, "learning_rate": 1.704129729994875e-05, "loss": 0.1917, "step": 3430 }, { "epoch": 0.273888401053724, "grad_norm": 0.32152678837479937, "learning_rate": 1.7039461203431623e-05, "loss": 0.189, "step": 3431 }, { "epoch": 0.2739682286261675, "grad_norm": 0.28930879113844554, "learning_rate": 1.703762463635437e-05, "loss": 0.2142, "step": 3432 }, { "epoch": 0.274048056198611, "grad_norm": 0.3055724381575453, "learning_rate": 1.7035787598839766e-05, "loss": 0.1717, "step": 3433 }, { "epoch": 0.2741278837710545, "grad_norm": 0.32993944861645486, "learning_rate": 1.7033950091010603e-05, "loss": 0.2198, "step": 3434 }, { "epoch": 0.27420771134349803, "grad_norm": 0.39809006357796806, "learning_rate": 1.7032112112989715e-05, "loss": 0.2179, "step": 3435 }, { "epoch": 0.27428753891594154, "grad_norm": 0.4220994052845089, "learning_rate": 1.7030273664899957e-05, "loss": 0.205, "step": 3436 }, { "epoch": 0.2743673664883851, "grad_norm": 0.3198222961625297, "learning_rate": 1.7028434746864228e-05, "loss": 0.209, "step": 3437 }, { "epoch": 0.2744471940608286, "grad_norm": 0.2942704937858829, "learning_rate": 1.7026595359005452e-05, "loss": 0.2551, "step": 3438 }, { "epoch": 0.27452702163327214, "grad_norm": 0.37364336046389485, "learning_rate": 1.7024755501446578e-05, "loss": 0.1689, "step": 3439 }, { "epoch": 0.27460684920571565, "grad_norm": 0.34784396163860376, "learning_rate": 1.7022915174310606e-05, "loss": 0.1955, "step": 3440 }, { "epoch": 0.27468667677815917, "grad_norm": 0.3106253603902221, "learning_rate": 1.702107437772054e-05, "loss": 0.161, "step": 3441 }, { "epoch": 0.2747665043506027, "grad_norm": 0.2832970725744624, "learning_rate": 1.7019233111799443e-05, "loss": 0.1872, "step": 3442 }, { "epoch": 0.2748463319230462, "grad_norm": 0.2998797438762178, "learning_rate": 1.701739137667039e-05, "loss": 0.2166, "step": 3443 }, { "epoch": 0.27492615949548976, "grad_norm": 0.28263997341065294, "learning_rate": 1.701554917245649e-05, "loss": 0.2099, "step": 3444 }, { "epoch": 0.2750059870679333, "grad_norm": 0.3218070088475507, "learning_rate": 1.7013706499280897e-05, "loss": 0.1983, "step": 3445 }, { "epoch": 0.2750858146403768, "grad_norm": 0.2921864639762163, "learning_rate": 1.701186335726678e-05, "loss": 0.1735, "step": 3446 }, { "epoch": 0.2751656422128203, "grad_norm": 0.34946126873319494, "learning_rate": 1.7010019746537343e-05, "loss": 0.1906, "step": 3447 }, { "epoch": 0.2752454697852638, "grad_norm": 0.3159938596317303, "learning_rate": 1.700817566721583e-05, "loss": 0.2098, "step": 3448 }, { "epoch": 0.27532529735770733, "grad_norm": 0.3548436628881707, "learning_rate": 1.7006331119425504e-05, "loss": 0.187, "step": 3449 }, { "epoch": 0.27540512493015085, "grad_norm": 0.3207113067467507, "learning_rate": 1.7004486103289676e-05, "loss": 0.1735, "step": 3450 }, { "epoch": 0.2754849525025944, "grad_norm": 0.3156880004536362, "learning_rate": 1.700264061893167e-05, "loss": 0.2271, "step": 3451 }, { "epoch": 0.27556478007503793, "grad_norm": 0.3790192448150453, "learning_rate": 1.7000794666474846e-05, "loss": 0.1443, "step": 3452 }, { "epoch": 0.27564460764748144, "grad_norm": 0.3635796330642722, "learning_rate": 1.699894824604261e-05, "loss": 0.2102, "step": 3453 }, { "epoch": 0.27572443521992496, "grad_norm": 0.27414111468403707, "learning_rate": 1.6997101357758375e-05, "loss": 0.2164, "step": 3454 }, { "epoch": 0.27580426279236847, "grad_norm": 0.27282407311548296, "learning_rate": 1.699525400174561e-05, "loss": 0.2245, "step": 3455 }, { "epoch": 0.275884090364812, "grad_norm": 0.3395271596437306, "learning_rate": 1.6993406178127795e-05, "loss": 0.1965, "step": 3456 }, { "epoch": 0.27596391793725555, "grad_norm": 0.3534269649881025, "learning_rate": 1.6991557887028455e-05, "loss": 0.1688, "step": 3457 }, { "epoch": 0.27604374550969907, "grad_norm": 0.33125262734078925, "learning_rate": 1.698970912857114e-05, "loss": 0.1987, "step": 3458 }, { "epoch": 0.2761235730821426, "grad_norm": 0.3593587474006748, "learning_rate": 1.698785990287943e-05, "loss": 0.1631, "step": 3459 }, { "epoch": 0.2762034006545861, "grad_norm": 0.3584572754100424, "learning_rate": 1.6986010210076934e-05, "loss": 0.2396, "step": 3460 }, { "epoch": 0.2762832282270296, "grad_norm": 0.3088924460677412, "learning_rate": 1.698416005028731e-05, "loss": 0.1996, "step": 3461 }, { "epoch": 0.2763630557994731, "grad_norm": 0.3458832186903928, "learning_rate": 1.698230942363422e-05, "loss": 0.1894, "step": 3462 }, { "epoch": 0.27644288337191664, "grad_norm": 0.3961213106682023, "learning_rate": 1.698045833024138e-05, "loss": 0.1752, "step": 3463 }, { "epoch": 0.2765227109443602, "grad_norm": 0.3253784500076669, "learning_rate": 1.6978606770232528e-05, "loss": 0.185, "step": 3464 }, { "epoch": 0.2766025385168037, "grad_norm": 0.3371672492652073, "learning_rate": 1.6976754743731425e-05, "loss": 0.206, "step": 3465 }, { "epoch": 0.27668236608924723, "grad_norm": 0.2956327713101839, "learning_rate": 1.6974902250861883e-05, "loss": 0.2221, "step": 3466 }, { "epoch": 0.27676219366169075, "grad_norm": 0.31540343072610966, "learning_rate": 1.6973049291747724e-05, "loss": 0.1718, "step": 3467 }, { "epoch": 0.27684202123413426, "grad_norm": 0.29937180075408487, "learning_rate": 1.6971195866512812e-05, "loss": 0.2212, "step": 3468 }, { "epoch": 0.2769218488065778, "grad_norm": 0.32762128527155115, "learning_rate": 1.696934197528105e-05, "loss": 0.1622, "step": 3469 }, { "epoch": 0.2770016763790213, "grad_norm": 0.3109167480960106, "learning_rate": 1.696748761817636e-05, "loss": 0.1414, "step": 3470 }, { "epoch": 0.27708150395146486, "grad_norm": 0.3277858362454786, "learning_rate": 1.6965632795322687e-05, "loss": 0.2368, "step": 3471 }, { "epoch": 0.27716133152390837, "grad_norm": 0.28802369118009336, "learning_rate": 1.6963777506844037e-05, "loss": 0.2008, "step": 3472 }, { "epoch": 0.2772411590963519, "grad_norm": 0.28806548402531984, "learning_rate": 1.6961921752864414e-05, "loss": 0.1987, "step": 3473 }, { "epoch": 0.2773209866687954, "grad_norm": 0.30407892340952897, "learning_rate": 1.696006553350787e-05, "loss": 0.2046, "step": 3474 }, { "epoch": 0.2774008142412389, "grad_norm": 0.3308663578565587, "learning_rate": 1.6958208848898495e-05, "loss": 0.1853, "step": 3475 }, { "epoch": 0.2774806418136824, "grad_norm": 0.3891489559628248, "learning_rate": 1.6956351699160394e-05, "loss": 0.2355, "step": 3476 }, { "epoch": 0.27756046938612594, "grad_norm": 0.37183669880834685, "learning_rate": 1.695449408441771e-05, "loss": 0.2487, "step": 3477 }, { "epoch": 0.2776402969585695, "grad_norm": 0.34894604162647996, "learning_rate": 1.695263600479462e-05, "loss": 0.2025, "step": 3478 }, { "epoch": 0.277720124531013, "grad_norm": 0.3129411307950474, "learning_rate": 1.6950777460415324e-05, "loss": 0.1914, "step": 3479 }, { "epoch": 0.27779995210345654, "grad_norm": 0.33660383340950395, "learning_rate": 1.6948918451404063e-05, "loss": 0.1979, "step": 3480 }, { "epoch": 0.27787977967590005, "grad_norm": 0.34970939235312254, "learning_rate": 1.694705897788511e-05, "loss": 0.2261, "step": 3481 }, { "epoch": 0.27795960724834357, "grad_norm": 0.36625605343703094, "learning_rate": 1.6945199039982746e-05, "loss": 0.2103, "step": 3482 }, { "epoch": 0.2780394348207871, "grad_norm": 0.35928851461406064, "learning_rate": 1.6943338637821317e-05, "loss": 0.199, "step": 3483 }, { "epoch": 0.27811926239323065, "grad_norm": 0.33128061345446047, "learning_rate": 1.6941477771525175e-05, "loss": 0.172, "step": 3484 }, { "epoch": 0.27819908996567416, "grad_norm": 0.3814540669941063, "learning_rate": 1.6939616441218717e-05, "loss": 0.2112, "step": 3485 }, { "epoch": 0.2782789175381177, "grad_norm": 0.36392607738879695, "learning_rate": 1.6937754647026364e-05, "loss": 0.1784, "step": 3486 }, { "epoch": 0.2783587451105612, "grad_norm": 0.32549266626637197, "learning_rate": 1.6935892389072566e-05, "loss": 0.2066, "step": 3487 }, { "epoch": 0.2784385726830047, "grad_norm": 0.2802160142385672, "learning_rate": 1.6934029667481814e-05, "loss": 0.2156, "step": 3488 }, { "epoch": 0.2785184002554482, "grad_norm": 0.3545051928480611, "learning_rate": 1.6932166482378613e-05, "loss": 0.1359, "step": 3489 }, { "epoch": 0.27859822782789173, "grad_norm": 0.3518549105410817, "learning_rate": 1.693030283388752e-05, "loss": 0.1689, "step": 3490 }, { "epoch": 0.2786780554003353, "grad_norm": 0.3358848889340386, "learning_rate": 1.692843872213311e-05, "loss": 0.1943, "step": 3491 }, { "epoch": 0.2787578829727788, "grad_norm": 0.33551790489375755, "learning_rate": 1.692657414723999e-05, "loss": 0.1775, "step": 3492 }, { "epoch": 0.27883771054522233, "grad_norm": 0.3187051099626498, "learning_rate": 1.6924709109332796e-05, "loss": 0.214, "step": 3493 }, { "epoch": 0.27891753811766584, "grad_norm": 0.25591735774743546, "learning_rate": 1.6922843608536202e-05, "loss": 0.1949, "step": 3494 }, { "epoch": 0.27899736569010936, "grad_norm": 0.2687307502220679, "learning_rate": 1.6920977644974915e-05, "loss": 0.1958, "step": 3495 }, { "epoch": 0.27907719326255287, "grad_norm": 0.3763759399216303, "learning_rate": 1.6919111218773657e-05, "loss": 0.2287, "step": 3496 }, { "epoch": 0.2791570208349964, "grad_norm": 0.4187002618889006, "learning_rate": 1.6917244330057198e-05, "loss": 0.2037, "step": 3497 }, { "epoch": 0.27923684840743995, "grad_norm": 0.2570090071963721, "learning_rate": 1.6915376978950328e-05, "loss": 0.1735, "step": 3498 }, { "epoch": 0.27931667597988347, "grad_norm": 0.3092935558608146, "learning_rate": 1.6913509165577876e-05, "loss": 0.2037, "step": 3499 }, { "epoch": 0.279396503552327, "grad_norm": 0.4005114119548686, "learning_rate": 1.6911640890064693e-05, "loss": 0.2155, "step": 3500 }, { "epoch": 0.2794763311247705, "grad_norm": 0.32891728505113194, "learning_rate": 1.6909772152535674e-05, "loss": 0.2184, "step": 3501 }, { "epoch": 0.279556158697214, "grad_norm": 0.3090117038814398, "learning_rate": 1.6907902953115726e-05, "loss": 0.1817, "step": 3502 }, { "epoch": 0.2796359862696575, "grad_norm": 0.3765801316956214, "learning_rate": 1.6906033291929804e-05, "loss": 0.1586, "step": 3503 }, { "epoch": 0.27971581384210104, "grad_norm": 0.33376618772156197, "learning_rate": 1.690416316910289e-05, "loss": 0.2247, "step": 3504 }, { "epoch": 0.2797956414145446, "grad_norm": 0.347560905809771, "learning_rate": 1.690229258475999e-05, "loss": 0.246, "step": 3505 }, { "epoch": 0.2798754689869881, "grad_norm": 0.30662860719241725, "learning_rate": 1.6900421539026142e-05, "loss": 0.1895, "step": 3506 }, { "epoch": 0.27995529655943163, "grad_norm": 0.281509619829362, "learning_rate": 1.6898550032026425e-05, "loss": 0.1619, "step": 3507 }, { "epoch": 0.28003512413187515, "grad_norm": 0.3416297752555422, "learning_rate": 1.689667806388594e-05, "loss": 0.2025, "step": 3508 }, { "epoch": 0.28011495170431866, "grad_norm": 0.34620256234654, "learning_rate": 1.6894805634729817e-05, "loss": 0.2149, "step": 3509 }, { "epoch": 0.2801947792767622, "grad_norm": 0.36356080972879407, "learning_rate": 1.6892932744683225e-05, "loss": 0.1561, "step": 3510 }, { "epoch": 0.28027460684920574, "grad_norm": 0.32034946519401986, "learning_rate": 1.6891059393871357e-05, "loss": 0.2139, "step": 3511 }, { "epoch": 0.28035443442164926, "grad_norm": 0.4653674598420624, "learning_rate": 1.6889185582419438e-05, "loss": 0.1818, "step": 3512 }, { "epoch": 0.28043426199409277, "grad_norm": 0.3871305365280683, "learning_rate": 1.6887311310452727e-05, "loss": 0.1893, "step": 3513 }, { "epoch": 0.2805140895665363, "grad_norm": 0.347837378293963, "learning_rate": 1.688543657809651e-05, "loss": 0.2021, "step": 3514 }, { "epoch": 0.2805939171389798, "grad_norm": 0.3526964229060104, "learning_rate": 1.6883561385476107e-05, "loss": 0.1721, "step": 3515 }, { "epoch": 0.2806737447114233, "grad_norm": 0.2830818626968381, "learning_rate": 1.688168573271687e-05, "loss": 0.1895, "step": 3516 }, { "epoch": 0.2807535722838668, "grad_norm": 0.34849999547955707, "learning_rate": 1.687980961994417e-05, "loss": 0.1653, "step": 3517 }, { "epoch": 0.2808333998563104, "grad_norm": 0.2960752690047005, "learning_rate": 1.6877933047283426e-05, "loss": 0.1927, "step": 3518 }, { "epoch": 0.2809132274287539, "grad_norm": 0.2959760073153885, "learning_rate": 1.6876056014860074e-05, "loss": 0.1804, "step": 3519 }, { "epoch": 0.2809930550011974, "grad_norm": 0.294287174600071, "learning_rate": 1.6874178522799592e-05, "loss": 0.2509, "step": 3520 }, { "epoch": 0.28107288257364094, "grad_norm": 0.28617599114785897, "learning_rate": 1.687230057122748e-05, "loss": 0.1768, "step": 3521 }, { "epoch": 0.28115271014608445, "grad_norm": 0.31887276014803556, "learning_rate": 1.687042216026927e-05, "loss": 0.1659, "step": 3522 }, { "epoch": 0.28123253771852796, "grad_norm": 0.2716503130372492, "learning_rate": 1.6868543290050526e-05, "loss": 0.1751, "step": 3523 }, { "epoch": 0.2813123652909715, "grad_norm": 0.3384148441940099, "learning_rate": 1.686666396069685e-05, "loss": 0.1852, "step": 3524 }, { "epoch": 0.28139219286341505, "grad_norm": 0.3337369500930098, "learning_rate": 1.6864784172333855e-05, "loss": 0.2125, "step": 3525 }, { "epoch": 0.28147202043585856, "grad_norm": 0.3127968729497578, "learning_rate": 1.6862903925087212e-05, "loss": 0.2033, "step": 3526 }, { "epoch": 0.2815518480083021, "grad_norm": 0.3683005074004469, "learning_rate": 1.6861023219082598e-05, "loss": 0.1939, "step": 3527 }, { "epoch": 0.2816316755807456, "grad_norm": 0.30245309348556526, "learning_rate": 1.6859142054445734e-05, "loss": 0.2121, "step": 3528 }, { "epoch": 0.2817115031531891, "grad_norm": 0.3223133019179437, "learning_rate": 1.6857260431302366e-05, "loss": 0.1905, "step": 3529 }, { "epoch": 0.2817913307256326, "grad_norm": 0.324783878331051, "learning_rate": 1.685537834977828e-05, "loss": 0.123, "step": 3530 }, { "epoch": 0.28187115829807613, "grad_norm": 0.35035680954675646, "learning_rate": 1.6853495809999278e-05, "loss": 0.1767, "step": 3531 }, { "epoch": 0.2819509858705197, "grad_norm": 0.2942910280139362, "learning_rate": 1.68516128120912e-05, "loss": 0.2293, "step": 3532 }, { "epoch": 0.2820308134429632, "grad_norm": 0.3142171256536708, "learning_rate": 1.6849729356179928e-05, "loss": 0.2192, "step": 3533 }, { "epoch": 0.2821106410154067, "grad_norm": 0.31031610267179077, "learning_rate": 1.684784544239135e-05, "loss": 0.2171, "step": 3534 }, { "epoch": 0.28219046858785024, "grad_norm": 0.33644351897766384, "learning_rate": 1.6845961070851403e-05, "loss": 0.1885, "step": 3535 }, { "epoch": 0.28227029616029375, "grad_norm": 0.31142450456156734, "learning_rate": 1.6844076241686057e-05, "loss": 0.1959, "step": 3536 }, { "epoch": 0.28235012373273727, "grad_norm": 0.27161352849427706, "learning_rate": 1.6842190955021295e-05, "loss": 0.2114, "step": 3537 }, { "epoch": 0.2824299513051808, "grad_norm": 0.30140692297167965, "learning_rate": 1.6840305210983143e-05, "loss": 0.1778, "step": 3538 }, { "epoch": 0.28250977887762435, "grad_norm": 0.35110118445061433, "learning_rate": 1.6838419009697663e-05, "loss": 0.2173, "step": 3539 }, { "epoch": 0.28258960645006787, "grad_norm": 0.285504465204286, "learning_rate": 1.6836532351290934e-05, "loss": 0.1945, "step": 3540 }, { "epoch": 0.2826694340225114, "grad_norm": 0.3016208305221153, "learning_rate": 1.683464523588907e-05, "loss": 0.1838, "step": 3541 }, { "epoch": 0.2827492615949549, "grad_norm": 0.32545657665498695, "learning_rate": 1.6832757663618217e-05, "loss": 0.1825, "step": 3542 }, { "epoch": 0.2828290891673984, "grad_norm": 0.30797758532653535, "learning_rate": 1.683086963460456e-05, "loss": 0.1778, "step": 3543 }, { "epoch": 0.2829089167398419, "grad_norm": 0.35113746217141606, "learning_rate": 1.6828981148974294e-05, "loss": 0.1912, "step": 3544 }, { "epoch": 0.2829887443122855, "grad_norm": 0.34499627101081237, "learning_rate": 1.6827092206853667e-05, "loss": 0.224, "step": 3545 }, { "epoch": 0.283068571884729, "grad_norm": 0.33176538370989206, "learning_rate": 1.6825202808368944e-05, "loss": 0.2178, "step": 3546 }, { "epoch": 0.2831483994571725, "grad_norm": 0.3045829754806731, "learning_rate": 1.6823312953646424e-05, "loss": 0.1736, "step": 3547 }, { "epoch": 0.28322822702961603, "grad_norm": 0.3067735356040461, "learning_rate": 1.6821422642812434e-05, "loss": 0.2506, "step": 3548 }, { "epoch": 0.28330805460205954, "grad_norm": 0.36199104164143253, "learning_rate": 1.6819531875993338e-05, "loss": 0.2046, "step": 3549 }, { "epoch": 0.28338788217450306, "grad_norm": 0.2673297969768724, "learning_rate": 1.6817640653315522e-05, "loss": 0.1664, "step": 3550 }, { "epoch": 0.2834677097469466, "grad_norm": 0.28833293062770415, "learning_rate": 1.6815748974905412e-05, "loss": 0.2104, "step": 3551 }, { "epoch": 0.28354753731939014, "grad_norm": 0.35917924339580576, "learning_rate": 1.681385684088945e-05, "loss": 0.1492, "step": 3552 }, { "epoch": 0.28362736489183366, "grad_norm": 0.2735488295891215, "learning_rate": 1.6811964251394126e-05, "loss": 0.2669, "step": 3553 }, { "epoch": 0.28370719246427717, "grad_norm": 0.3179773886630065, "learning_rate": 1.681007120654595e-05, "loss": 0.2347, "step": 3554 }, { "epoch": 0.2837870200367207, "grad_norm": 0.31599516791403465, "learning_rate": 1.6808177706471465e-05, "loss": 0.1683, "step": 3555 }, { "epoch": 0.2838668476091642, "grad_norm": 0.3810187254027419, "learning_rate": 1.6806283751297244e-05, "loss": 0.223, "step": 3556 }, { "epoch": 0.2839466751816077, "grad_norm": 0.3037141267753393, "learning_rate": 1.6804389341149886e-05, "loss": 0.1958, "step": 3557 }, { "epoch": 0.2840265027540512, "grad_norm": 0.3606440778502727, "learning_rate": 1.680249447615603e-05, "loss": 0.1932, "step": 3558 }, { "epoch": 0.2841063303264948, "grad_norm": 0.3168751253277819, "learning_rate": 1.6800599156442337e-05, "loss": 0.1789, "step": 3559 }, { "epoch": 0.2841861578989383, "grad_norm": 0.2775139369333828, "learning_rate": 1.6798703382135506e-05, "loss": 0.1407, "step": 3560 }, { "epoch": 0.2842659854713818, "grad_norm": 0.3662116991598532, "learning_rate": 1.679680715336226e-05, "loss": 0.1863, "step": 3561 }, { "epoch": 0.28434581304382534, "grad_norm": 0.2715414994159255, "learning_rate": 1.6794910470249353e-05, "loss": 0.1976, "step": 3562 }, { "epoch": 0.28442564061626885, "grad_norm": 0.2826708577394512, "learning_rate": 1.6793013332923568e-05, "loss": 0.2002, "step": 3563 }, { "epoch": 0.28450546818871236, "grad_norm": 0.27577184378683467, "learning_rate": 1.6791115741511728e-05, "loss": 0.1828, "step": 3564 }, { "epoch": 0.2845852957611559, "grad_norm": 0.3098770740219353, "learning_rate": 1.6789217696140673e-05, "loss": 0.1993, "step": 3565 }, { "epoch": 0.28466512333359945, "grad_norm": 0.28921753046159693, "learning_rate": 1.6787319196937288e-05, "loss": 0.2052, "step": 3566 }, { "epoch": 0.28474495090604296, "grad_norm": 0.29688121024539976, "learning_rate": 1.678542024402847e-05, "loss": 0.195, "step": 3567 }, { "epoch": 0.2848247784784865, "grad_norm": 0.2660548916847874, "learning_rate": 1.678352083754116e-05, "loss": 0.1646, "step": 3568 }, { "epoch": 0.28490460605093, "grad_norm": 0.26263426344403346, "learning_rate": 1.6781620977602334e-05, "loss": 0.1519, "step": 3569 }, { "epoch": 0.2849844336233735, "grad_norm": 0.3640787794386801, "learning_rate": 1.677972066433898e-05, "loss": 0.2222, "step": 3570 }, { "epoch": 0.285064261195817, "grad_norm": 0.2786195276244239, "learning_rate": 1.677781989787813e-05, "loss": 0.1752, "step": 3571 }, { "epoch": 0.2851440887682606, "grad_norm": 0.28893730574181986, "learning_rate": 1.677591867834684e-05, "loss": 0.1843, "step": 3572 }, { "epoch": 0.2852239163407041, "grad_norm": 0.2745757211150174, "learning_rate": 1.6774017005872204e-05, "loss": 0.2041, "step": 3573 }, { "epoch": 0.2853037439131476, "grad_norm": 0.39652731837557254, "learning_rate": 1.6772114880581337e-05, "loss": 0.2144, "step": 3574 }, { "epoch": 0.2853835714855911, "grad_norm": 0.28242088314038255, "learning_rate": 1.6770212302601392e-05, "loss": 0.1783, "step": 3575 }, { "epoch": 0.28546339905803464, "grad_norm": 0.2668670710015148, "learning_rate": 1.676830927205955e-05, "loss": 0.1693, "step": 3576 }, { "epoch": 0.28554322663047815, "grad_norm": 0.32013106599332336, "learning_rate": 1.6766405789083015e-05, "loss": 0.2072, "step": 3577 }, { "epoch": 0.28562305420292167, "grad_norm": 0.3088719756321276, "learning_rate": 1.676450185379903e-05, "loss": 0.1909, "step": 3578 }, { "epoch": 0.28570288177536524, "grad_norm": 0.3425335604088447, "learning_rate": 1.6762597466334866e-05, "loss": 0.2103, "step": 3579 }, { "epoch": 0.28578270934780875, "grad_norm": 0.36203859277573164, "learning_rate": 1.6760692626817823e-05, "loss": 0.2027, "step": 3580 }, { "epoch": 0.28586253692025226, "grad_norm": 0.34829913574996174, "learning_rate": 1.675878733537524e-05, "loss": 0.1838, "step": 3581 }, { "epoch": 0.2859423644926958, "grad_norm": 0.27531535897409676, "learning_rate": 1.6756881592134462e-05, "loss": 0.2011, "step": 3582 }, { "epoch": 0.2860221920651393, "grad_norm": 0.33096393619181513, "learning_rate": 1.6754975397222893e-05, "loss": 0.1984, "step": 3583 }, { "epoch": 0.2861020196375828, "grad_norm": 0.3819691845956637, "learning_rate": 1.675306875076795e-05, "loss": 0.2005, "step": 3584 }, { "epoch": 0.2861818472100263, "grad_norm": 0.3126512378448573, "learning_rate": 1.675116165289709e-05, "loss": 0.2321, "step": 3585 }, { "epoch": 0.2862616747824699, "grad_norm": 0.32924007345878625, "learning_rate": 1.6749254103737785e-05, "loss": 0.1689, "step": 3586 }, { "epoch": 0.2863415023549134, "grad_norm": 0.37008722406970485, "learning_rate": 1.6747346103417553e-05, "loss": 0.1902, "step": 3587 }, { "epoch": 0.2864213299273569, "grad_norm": 0.38841373060826606, "learning_rate": 1.674543765206394e-05, "loss": 0.1899, "step": 3588 }, { "epoch": 0.28650115749980043, "grad_norm": 0.31890865607140145, "learning_rate": 1.6743528749804512e-05, "loss": 0.1657, "step": 3589 }, { "epoch": 0.28658098507224394, "grad_norm": 0.2688204264818432, "learning_rate": 1.6741619396766876e-05, "loss": 0.2222, "step": 3590 }, { "epoch": 0.28666081264468746, "grad_norm": 0.35312186378811616, "learning_rate": 1.673970959307866e-05, "loss": 0.2269, "step": 3591 }, { "epoch": 0.28674064021713097, "grad_norm": 0.3810470685935724, "learning_rate": 1.673779933886753e-05, "loss": 0.1824, "step": 3592 }, { "epoch": 0.28682046778957454, "grad_norm": 0.34176975120574127, "learning_rate": 1.673588863426118e-05, "loss": 0.2371, "step": 3593 }, { "epoch": 0.28690029536201805, "grad_norm": 0.26090489713223014, "learning_rate": 1.6733977479387332e-05, "loss": 0.1874, "step": 3594 }, { "epoch": 0.28698012293446157, "grad_norm": 0.3201895106814237, "learning_rate": 1.6732065874373737e-05, "loss": 0.2086, "step": 3595 }, { "epoch": 0.2870599505069051, "grad_norm": 0.42808917665735025, "learning_rate": 1.673015381934818e-05, "loss": 0.1966, "step": 3596 }, { "epoch": 0.2871397780793486, "grad_norm": 0.3675346060492654, "learning_rate": 1.6728241314438473e-05, "loss": 0.2041, "step": 3597 }, { "epoch": 0.2872196056517921, "grad_norm": 0.3498074487038911, "learning_rate": 1.6726328359772463e-05, "loss": 0.2096, "step": 3598 }, { "epoch": 0.2872994332242357, "grad_norm": 0.322115852295183, "learning_rate": 1.6724414955478027e-05, "loss": 0.1525, "step": 3599 }, { "epoch": 0.2873792607966792, "grad_norm": 0.3231384437569135, "learning_rate": 1.6722501101683053e-05, "loss": 0.1968, "step": 3600 }, { "epoch": 0.2874590883691227, "grad_norm": 0.33848811873992274, "learning_rate": 1.672058679851549e-05, "loss": 0.2785, "step": 3601 }, { "epoch": 0.2875389159415662, "grad_norm": 0.3126700418409733, "learning_rate": 1.671867204610329e-05, "loss": 0.1851, "step": 3602 }, { "epoch": 0.28761874351400973, "grad_norm": 0.31490551603232037, "learning_rate": 1.6716756844574456e-05, "loss": 0.192, "step": 3603 }, { "epoch": 0.28769857108645325, "grad_norm": 0.29984111241565803, "learning_rate": 1.6714841194057007e-05, "loss": 0.1949, "step": 3604 }, { "epoch": 0.28777839865889676, "grad_norm": 0.3759549141623494, "learning_rate": 1.6712925094678997e-05, "loss": 0.2271, "step": 3605 }, { "epoch": 0.28785822623134033, "grad_norm": 0.34852074199559435, "learning_rate": 1.6711008546568512e-05, "loss": 0.1677, "step": 3606 }, { "epoch": 0.28793805380378384, "grad_norm": 0.34840366820646484, "learning_rate": 1.6709091549853665e-05, "loss": 0.1572, "step": 3607 }, { "epoch": 0.28801788137622736, "grad_norm": 0.3249586675463096, "learning_rate": 1.6707174104662597e-05, "loss": 0.2428, "step": 3608 }, { "epoch": 0.28809770894867087, "grad_norm": 0.3607999154573019, "learning_rate": 1.670525621112348e-05, "loss": 0.1955, "step": 3609 }, { "epoch": 0.2881775365211144, "grad_norm": 0.29288238286205365, "learning_rate": 1.6703337869364525e-05, "loss": 0.1627, "step": 3610 }, { "epoch": 0.2882573640935579, "grad_norm": 0.32862153941466743, "learning_rate": 1.6701419079513962e-05, "loss": 0.1854, "step": 3611 }, { "epoch": 0.2883371916660014, "grad_norm": 0.41245602859470504, "learning_rate": 1.669949984170005e-05, "loss": 0.2137, "step": 3612 }, { "epoch": 0.288417019238445, "grad_norm": 0.30824377308218726, "learning_rate": 1.669758015605109e-05, "loss": 0.1791, "step": 3613 }, { "epoch": 0.2884968468108885, "grad_norm": 0.31291639379473823, "learning_rate": 1.6695660022695398e-05, "loss": 0.1663, "step": 3614 }, { "epoch": 0.288576674383332, "grad_norm": 0.3049212941975992, "learning_rate": 1.6693739441761335e-05, "loss": 0.214, "step": 3615 }, { "epoch": 0.2886565019557755, "grad_norm": 0.3145022839705392, "learning_rate": 1.669181841337728e-05, "loss": 0.2108, "step": 3616 }, { "epoch": 0.28873632952821904, "grad_norm": 0.3049294811484566, "learning_rate": 1.6689896937671642e-05, "loss": 0.1884, "step": 3617 }, { "epoch": 0.28881615710066255, "grad_norm": 0.31601817988188696, "learning_rate": 1.6687975014772873e-05, "loss": 0.1588, "step": 3618 }, { "epoch": 0.28889598467310607, "grad_norm": 0.289217541042384, "learning_rate": 1.668605264480944e-05, "loss": 0.1718, "step": 3619 }, { "epoch": 0.28897581224554963, "grad_norm": 0.2655867661126377, "learning_rate": 1.6684129827909848e-05, "loss": 0.1887, "step": 3620 }, { "epoch": 0.28905563981799315, "grad_norm": 0.32044811439917203, "learning_rate": 1.6682206564202626e-05, "loss": 0.1915, "step": 3621 }, { "epoch": 0.28913546739043666, "grad_norm": 0.3242035137694959, "learning_rate": 1.6680282853816344e-05, "loss": 0.1752, "step": 3622 }, { "epoch": 0.2892152949628802, "grad_norm": 0.4013734615903811, "learning_rate": 1.6678358696879587e-05, "loss": 0.2088, "step": 3623 }, { "epoch": 0.2892951225353237, "grad_norm": 0.36747100581578046, "learning_rate": 1.667643409352098e-05, "loss": 0.1913, "step": 3624 }, { "epoch": 0.2893749501077672, "grad_norm": 0.32205299075836946, "learning_rate": 1.6674509043869177e-05, "loss": 0.221, "step": 3625 }, { "epoch": 0.2894547776802107, "grad_norm": 0.3157891676955509, "learning_rate": 1.6672583548052855e-05, "loss": 0.2564, "step": 3626 }, { "epoch": 0.2895346052526543, "grad_norm": 0.2941375712181084, "learning_rate": 1.6670657606200733e-05, "loss": 0.2445, "step": 3627 }, { "epoch": 0.2896144328250978, "grad_norm": 0.3318579972543417, "learning_rate": 1.6668731218441547e-05, "loss": 0.1762, "step": 3628 }, { "epoch": 0.2896942603975413, "grad_norm": 0.3585084312757111, "learning_rate": 1.666680438490407e-05, "loss": 0.1623, "step": 3629 }, { "epoch": 0.28977408796998483, "grad_norm": 0.3800051604802108, "learning_rate": 1.6664877105717102e-05, "loss": 0.1627, "step": 3630 }, { "epoch": 0.28985391554242834, "grad_norm": 0.29773301865604473, "learning_rate": 1.6662949381009477e-05, "loss": 0.178, "step": 3631 }, { "epoch": 0.28993374311487186, "grad_norm": 0.26731899667652514, "learning_rate": 1.6661021210910055e-05, "loss": 0.2095, "step": 3632 }, { "epoch": 0.2900135706873154, "grad_norm": 0.28807932846751483, "learning_rate": 1.6659092595547723e-05, "loss": 0.1771, "step": 3633 }, { "epoch": 0.29009339825975894, "grad_norm": 0.2900875406367519, "learning_rate": 1.6657163535051403e-05, "loss": 0.227, "step": 3634 }, { "epoch": 0.29017322583220245, "grad_norm": 0.321289550609678, "learning_rate": 1.6655234029550048e-05, "loss": 0.18, "step": 3635 }, { "epoch": 0.29025305340464597, "grad_norm": 0.3118513484150336, "learning_rate": 1.6653304079172637e-05, "loss": 0.1691, "step": 3636 }, { "epoch": 0.2903328809770895, "grad_norm": 0.31272033223289925, "learning_rate": 1.6651373684048176e-05, "loss": 0.2099, "step": 3637 }, { "epoch": 0.290412708549533, "grad_norm": 0.29067104651251335, "learning_rate": 1.6649442844305706e-05, "loss": 0.1857, "step": 3638 }, { "epoch": 0.2904925361219765, "grad_norm": 0.3059045924212901, "learning_rate": 1.6647511560074295e-05, "loss": 0.1811, "step": 3639 }, { "epoch": 0.2905723636944201, "grad_norm": 0.2660184955990461, "learning_rate": 1.6645579831483047e-05, "loss": 0.2236, "step": 3640 }, { "epoch": 0.2906521912668636, "grad_norm": 0.28783636599682977, "learning_rate": 1.6643647658661086e-05, "loss": 0.1852, "step": 3641 }, { "epoch": 0.2907320188393071, "grad_norm": 0.33342801573730624, "learning_rate": 1.664171504173757e-05, "loss": 0.1864, "step": 3642 }, { "epoch": 0.2908118464117506, "grad_norm": 0.31240415530915, "learning_rate": 1.663978198084169e-05, "loss": 0.2139, "step": 3643 }, { "epoch": 0.29089167398419413, "grad_norm": 0.2640397213542025, "learning_rate": 1.6637848476102657e-05, "loss": 0.1843, "step": 3644 }, { "epoch": 0.29097150155663765, "grad_norm": 0.3688945388496152, "learning_rate": 1.6635914527649724e-05, "loss": 0.2223, "step": 3645 }, { "epoch": 0.29105132912908116, "grad_norm": 0.36739046797843666, "learning_rate": 1.6633980135612166e-05, "loss": 0.2166, "step": 3646 }, { "epoch": 0.29113115670152473, "grad_norm": 0.33989092106004803, "learning_rate": 1.663204530011929e-05, "loss": 0.1648, "step": 3647 }, { "epoch": 0.29121098427396824, "grad_norm": 0.4297515658359507, "learning_rate": 1.6630110021300428e-05, "loss": 0.1789, "step": 3648 }, { "epoch": 0.29129081184641176, "grad_norm": 0.3118389271952289, "learning_rate": 1.662817429928495e-05, "loss": 0.1681, "step": 3649 }, { "epoch": 0.29137063941885527, "grad_norm": 0.3023666055657109, "learning_rate": 1.6626238134202254e-05, "loss": 0.192, "step": 3650 }, { "epoch": 0.2914504669912988, "grad_norm": 0.3236343945709547, "learning_rate": 1.662430152618176e-05, "loss": 0.1692, "step": 3651 }, { "epoch": 0.2915302945637423, "grad_norm": 0.3414841510643125, "learning_rate": 1.6622364475352925e-05, "loss": 0.1717, "step": 3652 }, { "epoch": 0.2916101221361858, "grad_norm": 0.36211920923881025, "learning_rate": 1.6620426981845233e-05, "loss": 0.2096, "step": 3653 }, { "epoch": 0.2916899497086294, "grad_norm": 0.3307449466355665, "learning_rate": 1.6618489045788196e-05, "loss": 0.1758, "step": 3654 }, { "epoch": 0.2917697772810729, "grad_norm": 0.37999554952558057, "learning_rate": 1.6616550667311356e-05, "loss": 0.1946, "step": 3655 }, { "epoch": 0.2918496048535164, "grad_norm": 0.3274184462099716, "learning_rate": 1.661461184654429e-05, "loss": 0.2355, "step": 3656 }, { "epoch": 0.2919294324259599, "grad_norm": 0.3216032447530247, "learning_rate": 1.66126725836166e-05, "loss": 0.194, "step": 3657 }, { "epoch": 0.29200925999840344, "grad_norm": 0.33798560249642057, "learning_rate": 1.661073287865792e-05, "loss": 0.1817, "step": 3658 }, { "epoch": 0.29208908757084695, "grad_norm": 0.41397265741016276, "learning_rate": 1.66087927317979e-05, "loss": 0.1937, "step": 3659 }, { "epoch": 0.2921689151432905, "grad_norm": 0.342630632560165, "learning_rate": 1.6606852143166245e-05, "loss": 0.1869, "step": 3660 }, { "epoch": 0.29224874271573403, "grad_norm": 0.3623992929090946, "learning_rate": 1.660491111289267e-05, "loss": 0.1291, "step": 3661 }, { "epoch": 0.29232857028817755, "grad_norm": 0.29118585068412617, "learning_rate": 1.6602969641106924e-05, "loss": 0.169, "step": 3662 }, { "epoch": 0.29240839786062106, "grad_norm": 0.35059109486881485, "learning_rate": 1.660102772793879e-05, "loss": 0.2017, "step": 3663 }, { "epoch": 0.2924882254330646, "grad_norm": 0.31462584259831844, "learning_rate": 1.6599085373518074e-05, "loss": 0.201, "step": 3664 }, { "epoch": 0.2925680530055081, "grad_norm": 0.3335121912042592, "learning_rate": 1.6597142577974618e-05, "loss": 0.1623, "step": 3665 }, { "epoch": 0.2926478805779516, "grad_norm": 0.3752329360607811, "learning_rate": 1.6595199341438287e-05, "loss": 0.2749, "step": 3666 }, { "epoch": 0.29272770815039517, "grad_norm": 0.3677313854214379, "learning_rate": 1.659325566403898e-05, "loss": 0.2033, "step": 3667 }, { "epoch": 0.2928075357228387, "grad_norm": 0.34454150026329033, "learning_rate": 1.6591311545906624e-05, "loss": 0.1679, "step": 3668 }, { "epoch": 0.2928873632952822, "grad_norm": 0.31967974241947145, "learning_rate": 1.6589366987171178e-05, "loss": 0.1974, "step": 3669 }, { "epoch": 0.2929671908677257, "grad_norm": 0.2857784729848602, "learning_rate": 1.6587421987962625e-05, "loss": 0.1991, "step": 3670 }, { "epoch": 0.2930470184401692, "grad_norm": 0.3507899173924113, "learning_rate": 1.658547654841098e-05, "loss": 0.1793, "step": 3671 }, { "epoch": 0.29312684601261274, "grad_norm": 0.3673207392323793, "learning_rate": 1.658353066864629e-05, "loss": 0.17, "step": 3672 }, { "epoch": 0.29320667358505625, "grad_norm": 0.3663719668821383, "learning_rate": 1.6581584348798627e-05, "loss": 0.2217, "step": 3673 }, { "epoch": 0.2932865011574998, "grad_norm": 0.35292076143460294, "learning_rate": 1.65796375889981e-05, "loss": 0.1728, "step": 3674 }, { "epoch": 0.29336632872994334, "grad_norm": 0.4247936943428015, "learning_rate": 1.6577690389374834e-05, "loss": 0.1394, "step": 3675 }, { "epoch": 0.29344615630238685, "grad_norm": 0.43774350387899347, "learning_rate": 1.6575742750058997e-05, "loss": 0.1645, "step": 3676 }, { "epoch": 0.29352598387483037, "grad_norm": 0.3128392194918362, "learning_rate": 1.6573794671180783e-05, "loss": 0.1781, "step": 3677 }, { "epoch": 0.2936058114472739, "grad_norm": 0.37635331042705766, "learning_rate": 1.657184615287041e-05, "loss": 0.2331, "step": 3678 }, { "epoch": 0.2936856390197174, "grad_norm": 0.36171749041631446, "learning_rate": 1.6569897195258127e-05, "loss": 0.1673, "step": 3679 }, { "epoch": 0.2937654665921609, "grad_norm": 0.32011587722729845, "learning_rate": 1.656794779847422e-05, "loss": 0.204, "step": 3680 }, { "epoch": 0.2938452941646045, "grad_norm": 0.31335160902999093, "learning_rate": 1.656599796264899e-05, "loss": 0.223, "step": 3681 }, { "epoch": 0.293925121737048, "grad_norm": 0.42045029729092276, "learning_rate": 1.6564047687912785e-05, "loss": 0.1949, "step": 3682 }, { "epoch": 0.2940049493094915, "grad_norm": 0.3259353958486087, "learning_rate": 1.6562096974395966e-05, "loss": 0.2227, "step": 3683 }, { "epoch": 0.294084776881935, "grad_norm": 0.326275562999215, "learning_rate": 1.6560145822228933e-05, "loss": 0.2004, "step": 3684 }, { "epoch": 0.29416460445437853, "grad_norm": 0.2981882631055185, "learning_rate": 1.6558194231542115e-05, "loss": 0.1821, "step": 3685 }, { "epoch": 0.29424443202682204, "grad_norm": 0.3646249243424146, "learning_rate": 1.6556242202465968e-05, "loss": 0.1834, "step": 3686 }, { "epoch": 0.29432425959926556, "grad_norm": 0.2953642435295485, "learning_rate": 1.6554289735130972e-05, "loss": 0.2171, "step": 3687 }, { "epoch": 0.29440408717170913, "grad_norm": 0.28092707409095846, "learning_rate": 1.6552336829667646e-05, "loss": 0.2097, "step": 3688 }, { "epoch": 0.29448391474415264, "grad_norm": 0.32466250344401837, "learning_rate": 1.655038348620653e-05, "loss": 0.2447, "step": 3689 }, { "epoch": 0.29456374231659616, "grad_norm": 0.37770634286894633, "learning_rate": 1.6548429704878208e-05, "loss": 0.13, "step": 3690 }, { "epoch": 0.29464356988903967, "grad_norm": 0.39369021329548193, "learning_rate": 1.654647548581327e-05, "loss": 0.206, "step": 3691 }, { "epoch": 0.2947233974614832, "grad_norm": 0.3739204311657686, "learning_rate": 1.6544520829142353e-05, "loss": 0.2312, "step": 3692 }, { "epoch": 0.2948032250339267, "grad_norm": 0.32272213193463023, "learning_rate": 1.654256573499612e-05, "loss": 0.2087, "step": 3693 }, { "epoch": 0.29488305260637027, "grad_norm": 0.38446340478054075, "learning_rate": 1.654061020350526e-05, "loss": 0.2044, "step": 3694 }, { "epoch": 0.2949628801788138, "grad_norm": 0.349277741001477, "learning_rate": 1.653865423480049e-05, "loss": 0.2011, "step": 3695 }, { "epoch": 0.2950427077512573, "grad_norm": 0.3223438428049074, "learning_rate": 1.653669782901256e-05, "loss": 0.1942, "step": 3696 }, { "epoch": 0.2951225353237008, "grad_norm": 0.3541744287349154, "learning_rate": 1.6534740986272247e-05, "loss": 0.1873, "step": 3697 }, { "epoch": 0.2952023628961443, "grad_norm": 0.5196598065069451, "learning_rate": 1.6532783706710364e-05, "loss": 0.2183, "step": 3698 }, { "epoch": 0.29528219046858784, "grad_norm": 0.32028505694019677, "learning_rate": 1.6530825990457742e-05, "loss": 0.2071, "step": 3699 }, { "epoch": 0.29536201804103135, "grad_norm": 0.2688415761728978, "learning_rate": 1.6528867837645247e-05, "loss": 0.2017, "step": 3700 }, { "epoch": 0.2954418456134749, "grad_norm": 0.32848091758579406, "learning_rate": 1.6526909248403776e-05, "loss": 0.2152, "step": 3701 }, { "epoch": 0.29552167318591843, "grad_norm": 0.3917194597159245, "learning_rate": 1.652495022286425e-05, "loss": 0.1768, "step": 3702 }, { "epoch": 0.29560150075836195, "grad_norm": 0.31074069369051777, "learning_rate": 1.6522990761157623e-05, "loss": 0.1999, "step": 3703 }, { "epoch": 0.29568132833080546, "grad_norm": 0.334465546602173, "learning_rate": 1.6521030863414878e-05, "loss": 0.2341, "step": 3704 }, { "epoch": 0.295761155903249, "grad_norm": 0.3923671365445714, "learning_rate": 1.6519070529767027e-05, "loss": 0.1891, "step": 3705 }, { "epoch": 0.2958409834756925, "grad_norm": 0.3731684146480883, "learning_rate": 1.651710976034511e-05, "loss": 0.2408, "step": 3706 }, { "epoch": 0.295920811048136, "grad_norm": 0.3356643343011407, "learning_rate": 1.65151485552802e-05, "loss": 0.2557, "step": 3707 }, { "epoch": 0.29600063862057957, "grad_norm": 0.3237327958524489, "learning_rate": 1.6513186914703387e-05, "loss": 0.1853, "step": 3708 }, { "epoch": 0.2960804661930231, "grad_norm": 0.2837852985813288, "learning_rate": 1.6511224838745805e-05, "loss": 0.2044, "step": 3709 }, { "epoch": 0.2961602937654666, "grad_norm": 0.32866631526470896, "learning_rate": 1.6509262327538612e-05, "loss": 0.1927, "step": 3710 }, { "epoch": 0.2962401213379101, "grad_norm": 0.37153030593340347, "learning_rate": 1.650729938121299e-05, "loss": 0.1739, "step": 3711 }, { "epoch": 0.2963199489103536, "grad_norm": 0.3168285564221755, "learning_rate": 1.650533599990016e-05, "loss": 0.1906, "step": 3712 }, { "epoch": 0.29639977648279714, "grad_norm": 0.3118973206134328, "learning_rate": 1.6503372183731362e-05, "loss": 0.2069, "step": 3713 }, { "epoch": 0.29647960405524065, "grad_norm": 0.4509620847640773, "learning_rate": 1.6501407932837866e-05, "loss": 0.1526, "step": 3714 }, { "epoch": 0.2965594316276842, "grad_norm": 0.35614955540685084, "learning_rate": 1.6499443247350982e-05, "loss": 0.1638, "step": 3715 }, { "epoch": 0.29663925920012774, "grad_norm": 0.30569131662912297, "learning_rate": 1.6497478127402035e-05, "loss": 0.175, "step": 3716 }, { "epoch": 0.29671908677257125, "grad_norm": 0.35880145418116255, "learning_rate": 1.649551257312239e-05, "loss": 0.2117, "step": 3717 }, { "epoch": 0.29679891434501476, "grad_norm": 0.4688032607645247, "learning_rate": 1.6493546584643438e-05, "loss": 0.1661, "step": 3718 }, { "epoch": 0.2968787419174583, "grad_norm": 0.2971988495895108, "learning_rate": 1.6491580162096592e-05, "loss": 0.1928, "step": 3719 }, { "epoch": 0.2969585694899018, "grad_norm": 0.3323182299626282, "learning_rate": 1.64896133056133e-05, "loss": 0.2314, "step": 3720 }, { "epoch": 0.29703839706234536, "grad_norm": 0.5080373716374077, "learning_rate": 1.6487646015325042e-05, "loss": 0.1721, "step": 3721 }, { "epoch": 0.2971182246347889, "grad_norm": 0.3627103265480647, "learning_rate": 1.6485678291363325e-05, "loss": 0.1956, "step": 3722 }, { "epoch": 0.2971980522072324, "grad_norm": 0.36671117705223694, "learning_rate": 1.648371013385968e-05, "loss": 0.2081, "step": 3723 }, { "epoch": 0.2972778797796759, "grad_norm": 0.32399442629940234, "learning_rate": 1.6481741542945667e-05, "loss": 0.1803, "step": 3724 }, { "epoch": 0.2973577073521194, "grad_norm": 0.3431542111363782, "learning_rate": 1.647977251875289e-05, "loss": 0.1899, "step": 3725 }, { "epoch": 0.29743753492456293, "grad_norm": 0.36600040340152656, "learning_rate": 1.6477803061412957e-05, "loss": 0.1725, "step": 3726 }, { "epoch": 0.29751736249700644, "grad_norm": 0.30014453588104145, "learning_rate": 1.647583317105753e-05, "loss": 0.195, "step": 3727 }, { "epoch": 0.29759719006945, "grad_norm": 0.3805050529987311, "learning_rate": 1.647386284781828e-05, "loss": 0.1585, "step": 3728 }, { "epoch": 0.2976770176418935, "grad_norm": 0.3798129969015873, "learning_rate": 1.647189209182692e-05, "loss": 0.1473, "step": 3729 }, { "epoch": 0.29775684521433704, "grad_norm": 0.327516245548906, "learning_rate": 1.646992090321518e-05, "loss": 0.2463, "step": 3730 }, { "epoch": 0.29783667278678055, "grad_norm": 0.3202875249017135, "learning_rate": 1.6467949282114836e-05, "loss": 0.1771, "step": 3731 }, { "epoch": 0.29791650035922407, "grad_norm": 0.31334603060020627, "learning_rate": 1.6465977228657677e-05, "loss": 0.1674, "step": 3732 }, { "epoch": 0.2979963279316676, "grad_norm": 0.28383817638918085, "learning_rate": 1.6464004742975534e-05, "loss": 0.205, "step": 3733 }, { "epoch": 0.2980761555041111, "grad_norm": 0.2881790250753482, "learning_rate": 1.6462031825200254e-05, "loss": 0.1622, "step": 3734 }, { "epoch": 0.29815598307655466, "grad_norm": 0.33284044945705993, "learning_rate": 1.6460058475463717e-05, "loss": 0.1859, "step": 3735 }, { "epoch": 0.2982358106489982, "grad_norm": 0.2947898061393275, "learning_rate": 1.6458084693897834e-05, "loss": 0.1758, "step": 3736 }, { "epoch": 0.2983156382214417, "grad_norm": 0.30058657104464065, "learning_rate": 1.645611048063455e-05, "loss": 0.1635, "step": 3737 }, { "epoch": 0.2983954657938852, "grad_norm": 0.36952239132554865, "learning_rate": 1.645413583580583e-05, "loss": 0.1248, "step": 3738 }, { "epoch": 0.2984752933663287, "grad_norm": 0.38372012616333157, "learning_rate": 1.645216075954367e-05, "loss": 0.1747, "step": 3739 }, { "epoch": 0.29855512093877223, "grad_norm": 0.2959345219657866, "learning_rate": 1.6450185251980094e-05, "loss": 0.1718, "step": 3740 }, { "epoch": 0.29863494851121575, "grad_norm": 0.30261339028497175, "learning_rate": 1.6448209313247164e-05, "loss": 0.2145, "step": 3741 }, { "epoch": 0.2987147760836593, "grad_norm": 0.32347972638085554, "learning_rate": 1.6446232943476958e-05, "loss": 0.1998, "step": 3742 }, { "epoch": 0.29879460365610283, "grad_norm": 0.344724275636595, "learning_rate": 1.644425614280159e-05, "loss": 0.1934, "step": 3743 }, { "epoch": 0.29887443122854634, "grad_norm": 0.29848415588348287, "learning_rate": 1.6442278911353204e-05, "loss": 0.1983, "step": 3744 }, { "epoch": 0.29895425880098986, "grad_norm": 0.3301900721138887, "learning_rate": 1.6440301249263963e-05, "loss": 0.1866, "step": 3745 }, { "epoch": 0.29903408637343337, "grad_norm": 0.33348663687967756, "learning_rate": 1.6438323156666073e-05, "loss": 0.2174, "step": 3746 }, { "epoch": 0.2991139139458769, "grad_norm": 0.3546943582665127, "learning_rate": 1.6436344633691763e-05, "loss": 0.1681, "step": 3747 }, { "epoch": 0.29919374151832046, "grad_norm": 0.32018692457317627, "learning_rate": 1.6434365680473284e-05, "loss": 0.1719, "step": 3748 }, { "epoch": 0.29927356909076397, "grad_norm": 0.3286294735899267, "learning_rate": 1.6432386297142916e-05, "loss": 0.1951, "step": 3749 }, { "epoch": 0.2993533966632075, "grad_norm": 0.3014585969242038, "learning_rate": 1.643040648383299e-05, "loss": 0.1624, "step": 3750 }, { "epoch": 0.299433224235651, "grad_norm": 0.3171042433905791, "learning_rate": 1.642842624067583e-05, "loss": 0.2257, "step": 3751 }, { "epoch": 0.2995130518080945, "grad_norm": 0.29140877057400755, "learning_rate": 1.6426445567803815e-05, "loss": 0.2315, "step": 3752 }, { "epoch": 0.299592879380538, "grad_norm": 0.2800631987048486, "learning_rate": 1.6424464465349354e-05, "loss": 0.158, "step": 3753 }, { "epoch": 0.29967270695298154, "grad_norm": 0.3289294376755506, "learning_rate": 1.642248293344486e-05, "loss": 0.1478, "step": 3754 }, { "epoch": 0.2997525345254251, "grad_norm": 0.33830170660693915, "learning_rate": 1.6420500972222804e-05, "loss": 0.2184, "step": 3755 }, { "epoch": 0.2998323620978686, "grad_norm": 0.34402328430960866, "learning_rate": 1.6418518581815667e-05, "loss": 0.1508, "step": 3756 }, { "epoch": 0.29991218967031213, "grad_norm": 0.3390225213517971, "learning_rate": 1.641653576235596e-05, "loss": 0.2051, "step": 3757 }, { "epoch": 0.29999201724275565, "grad_norm": 0.33845347277160714, "learning_rate": 1.641455251397623e-05, "loss": 0.2043, "step": 3758 }, { "epoch": 0.30007184481519916, "grad_norm": 0.43490556609284997, "learning_rate": 1.641256883680905e-05, "loss": 0.1932, "step": 3759 }, { "epoch": 0.3001516723876427, "grad_norm": 0.3018125608838068, "learning_rate": 1.6410584730987022e-05, "loss": 0.1814, "step": 3760 }, { "epoch": 0.3002314999600862, "grad_norm": 0.3242080364052167, "learning_rate": 1.6408600196642775e-05, "loss": 0.1709, "step": 3761 }, { "epoch": 0.30031132753252976, "grad_norm": 0.40360342233173063, "learning_rate": 1.6406615233908966e-05, "loss": 0.2319, "step": 3762 }, { "epoch": 0.3003911551049733, "grad_norm": 0.355511115093494, "learning_rate": 1.640462984291828e-05, "loss": 0.2183, "step": 3763 }, { "epoch": 0.3004709826774168, "grad_norm": 0.32421871335581026, "learning_rate": 1.6402644023803437e-05, "loss": 0.2211, "step": 3764 }, { "epoch": 0.3005508102498603, "grad_norm": 0.274370597461679, "learning_rate": 1.6400657776697177e-05, "loss": 0.208, "step": 3765 }, { "epoch": 0.3006306378223038, "grad_norm": 0.3223760254138711, "learning_rate": 1.6398671101732276e-05, "loss": 0.1593, "step": 3766 }, { "epoch": 0.30071046539474733, "grad_norm": 0.2931070929399399, "learning_rate": 1.6396683999041533e-05, "loss": 0.1901, "step": 3767 }, { "epoch": 0.30079029296719084, "grad_norm": 0.32334140468982275, "learning_rate": 1.639469646875778e-05, "loss": 0.1786, "step": 3768 }, { "epoch": 0.3008701205396344, "grad_norm": 0.2959880894066898, "learning_rate": 1.6392708511013874e-05, "loss": 0.1971, "step": 3769 }, { "epoch": 0.3009499481120779, "grad_norm": 0.299691874196475, "learning_rate": 1.6390720125942707e-05, "loss": 0.1683, "step": 3770 }, { "epoch": 0.30102977568452144, "grad_norm": 0.3390866096379741, "learning_rate": 1.6388731313677184e-05, "loss": 0.1771, "step": 3771 }, { "epoch": 0.30110960325696495, "grad_norm": 0.35654494464161496, "learning_rate": 1.6386742074350257e-05, "loss": 0.2221, "step": 3772 }, { "epoch": 0.30118943082940847, "grad_norm": 0.29710756612385947, "learning_rate": 1.63847524080949e-05, "loss": 0.1632, "step": 3773 }, { "epoch": 0.301269258401852, "grad_norm": 0.28908665484455753, "learning_rate": 1.638276231504411e-05, "loss": 0.2225, "step": 3774 }, { "epoch": 0.3013490859742955, "grad_norm": 0.29901970469638656, "learning_rate": 1.6380771795330915e-05, "loss": 0.1889, "step": 3775 }, { "epoch": 0.30142891354673906, "grad_norm": 0.3103146095567871, "learning_rate": 1.6378780849088386e-05, "loss": 0.1778, "step": 3776 }, { "epoch": 0.3015087411191826, "grad_norm": 0.3050173231645082, "learning_rate": 1.6376789476449594e-05, "loss": 0.1643, "step": 3777 }, { "epoch": 0.3015885686916261, "grad_norm": 0.36145352732974034, "learning_rate": 1.637479767754766e-05, "loss": 0.2501, "step": 3778 }, { "epoch": 0.3016683962640696, "grad_norm": 0.3045783864850851, "learning_rate": 1.6372805452515735e-05, "loss": 0.1829, "step": 3779 }, { "epoch": 0.3017482238365131, "grad_norm": 0.2839957844421412, "learning_rate": 1.6370812801486978e-05, "loss": 0.186, "step": 3780 }, { "epoch": 0.30182805140895663, "grad_norm": 0.2946803338854403, "learning_rate": 1.6368819724594606e-05, "loss": 0.1982, "step": 3781 }, { "epoch": 0.3019078789814002, "grad_norm": 0.3347667080693567, "learning_rate": 1.6366826221971833e-05, "loss": 0.1671, "step": 3782 }, { "epoch": 0.3019877065538437, "grad_norm": 0.33463867191289387, "learning_rate": 1.6364832293751924e-05, "loss": 0.1749, "step": 3783 }, { "epoch": 0.30206753412628723, "grad_norm": 0.2918624345028922, "learning_rate": 1.6362837940068166e-05, "loss": 0.143, "step": 3784 }, { "epoch": 0.30214736169873074, "grad_norm": 0.34289839909704906, "learning_rate": 1.636084316105387e-05, "loss": 0.1991, "step": 3785 }, { "epoch": 0.30222718927117426, "grad_norm": 0.30531756583994074, "learning_rate": 1.6358847956842385e-05, "loss": 0.185, "step": 3786 }, { "epoch": 0.30230701684361777, "grad_norm": 0.3215529132044395, "learning_rate": 1.6356852327567073e-05, "loss": 0.1711, "step": 3787 }, { "epoch": 0.3023868444160613, "grad_norm": 0.32408704235654856, "learning_rate": 1.6354856273361345e-05, "loss": 0.1814, "step": 3788 }, { "epoch": 0.30246667198850485, "grad_norm": 0.2986133712463853, "learning_rate": 1.6352859794358623e-05, "loss": 0.201, "step": 3789 }, { "epoch": 0.30254649956094837, "grad_norm": 0.3150579343404781, "learning_rate": 1.635086289069236e-05, "loss": 0.19, "step": 3790 }, { "epoch": 0.3026263271333919, "grad_norm": 0.29769768454237266, "learning_rate": 1.6348865562496054e-05, "loss": 0.2251, "step": 3791 }, { "epoch": 0.3027061547058354, "grad_norm": 0.34802447335755976, "learning_rate": 1.6346867809903208e-05, "loss": 0.1856, "step": 3792 }, { "epoch": 0.3027859822782789, "grad_norm": 0.3346393452797499, "learning_rate": 1.634486963304736e-05, "loss": 0.2207, "step": 3793 }, { "epoch": 0.3028658098507224, "grad_norm": 0.3020304261706238, "learning_rate": 1.634287103206209e-05, "loss": 0.1882, "step": 3794 }, { "epoch": 0.30294563742316594, "grad_norm": 0.3067491015762876, "learning_rate": 1.6340872007080994e-05, "loss": 0.2523, "step": 3795 }, { "epoch": 0.3030254649956095, "grad_norm": 0.3169187577794184, "learning_rate": 1.6338872558237698e-05, "loss": 0.1698, "step": 3796 }, { "epoch": 0.303105292568053, "grad_norm": 0.35550180582629115, "learning_rate": 1.6336872685665855e-05, "loss": 0.1524, "step": 3797 }, { "epoch": 0.30318512014049653, "grad_norm": 0.3231453404067973, "learning_rate": 1.633487238949915e-05, "loss": 0.1985, "step": 3798 }, { "epoch": 0.30326494771294005, "grad_norm": 0.3020159470066681, "learning_rate": 1.6332871669871298e-05, "loss": 0.1578, "step": 3799 }, { "epoch": 0.30334477528538356, "grad_norm": 0.3189101803807513, "learning_rate": 1.633087052691604e-05, "loss": 0.1214, "step": 3800 }, { "epoch": 0.3034246028578271, "grad_norm": 0.26225114362570195, "learning_rate": 1.6328868960767135e-05, "loss": 0.188, "step": 3801 }, { "epoch": 0.3035044304302706, "grad_norm": 0.2874665860702369, "learning_rate": 1.632686697155839e-05, "loss": 0.1918, "step": 3802 }, { "epoch": 0.30358425800271416, "grad_norm": 0.3214862708513687, "learning_rate": 1.6324864559423623e-05, "loss": 0.1937, "step": 3803 }, { "epoch": 0.30366408557515767, "grad_norm": 0.28775648380887353, "learning_rate": 1.632286172449669e-05, "loss": 0.1768, "step": 3804 }, { "epoch": 0.3037439131476012, "grad_norm": 0.3079240673174174, "learning_rate": 1.632085846691148e-05, "loss": 0.211, "step": 3805 }, { "epoch": 0.3038237407200447, "grad_norm": 0.39505770163642473, "learning_rate": 1.631885478680189e-05, "loss": 0.2143, "step": 3806 }, { "epoch": 0.3039035682924882, "grad_norm": 0.3199919822276774, "learning_rate": 1.631685068430186e-05, "loss": 0.164, "step": 3807 }, { "epoch": 0.3039833958649317, "grad_norm": 0.31279426590307885, "learning_rate": 1.631484615954537e-05, "loss": 0.1619, "step": 3808 }, { "epoch": 0.3040632234373753, "grad_norm": 0.2959039753170635, "learning_rate": 1.6312841212666398e-05, "loss": 0.1913, "step": 3809 }, { "epoch": 0.3041430510098188, "grad_norm": 0.3166175730117494, "learning_rate": 1.6310835843798974e-05, "loss": 0.1622, "step": 3810 }, { "epoch": 0.3042228785822623, "grad_norm": 0.30491727189536144, "learning_rate": 1.630883005307715e-05, "loss": 0.1657, "step": 3811 }, { "epoch": 0.30430270615470584, "grad_norm": 0.3267424270073485, "learning_rate": 1.6306823840635e-05, "loss": 0.1961, "step": 3812 }, { "epoch": 0.30438253372714935, "grad_norm": 0.3149660234846573, "learning_rate": 1.6304817206606637e-05, "loss": 0.1893, "step": 3813 }, { "epoch": 0.30446236129959287, "grad_norm": 0.3782638467242809, "learning_rate": 1.63028101511262e-05, "loss": 0.2189, "step": 3814 }, { "epoch": 0.3045421888720364, "grad_norm": 0.25950994835644875, "learning_rate": 1.630080267432784e-05, "loss": 0.2437, "step": 3815 }, { "epoch": 0.30462201644447995, "grad_norm": 0.2988203185023824, "learning_rate": 1.629879477634576e-05, "loss": 0.194, "step": 3816 }, { "epoch": 0.30470184401692346, "grad_norm": 0.35724421453409594, "learning_rate": 1.6296786457314177e-05, "loss": 0.1748, "step": 3817 }, { "epoch": 0.304781671589367, "grad_norm": 0.3564600251202394, "learning_rate": 1.6294777717367333e-05, "loss": 0.2289, "step": 3818 }, { "epoch": 0.3048614991618105, "grad_norm": 0.3885317311028571, "learning_rate": 1.6292768556639512e-05, "loss": 0.2055, "step": 3819 }, { "epoch": 0.304941326734254, "grad_norm": 0.32532119609723215, "learning_rate": 1.629075897526502e-05, "loss": 0.1945, "step": 3820 }, { "epoch": 0.3050211543066975, "grad_norm": 0.3897730230232775, "learning_rate": 1.628874897337818e-05, "loss": 0.1535, "step": 3821 }, { "epoch": 0.30510098187914103, "grad_norm": 0.2778550362551869, "learning_rate": 1.6286738551113366e-05, "loss": 0.251, "step": 3822 }, { "epoch": 0.3051808094515846, "grad_norm": 0.3156097216322441, "learning_rate": 1.6284727708604955e-05, "loss": 0.1423, "step": 3823 }, { "epoch": 0.3052606370240281, "grad_norm": 0.26982360768586916, "learning_rate": 1.6282716445987366e-05, "loss": 0.1992, "step": 3824 }, { "epoch": 0.30534046459647163, "grad_norm": 0.3903551283762406, "learning_rate": 1.6280704763395046e-05, "loss": 0.2063, "step": 3825 }, { "epoch": 0.30542029216891514, "grad_norm": 0.3006579714394263, "learning_rate": 1.627869266096247e-05, "loss": 0.2525, "step": 3826 }, { "epoch": 0.30550011974135866, "grad_norm": 0.2888944902026554, "learning_rate": 1.6276680138824138e-05, "loss": 0.1893, "step": 3827 }, { "epoch": 0.30557994731380217, "grad_norm": 0.31918703315407776, "learning_rate": 1.6274667197114577e-05, "loss": 0.175, "step": 3828 }, { "epoch": 0.3056597748862457, "grad_norm": 0.2866602785552964, "learning_rate": 1.6272653835968345e-05, "loss": 0.2162, "step": 3829 }, { "epoch": 0.30573960245868925, "grad_norm": 0.3349926378519994, "learning_rate": 1.6270640055520027e-05, "loss": 0.207, "step": 3830 }, { "epoch": 0.30581943003113277, "grad_norm": 0.2839416346808323, "learning_rate": 1.626862585590424e-05, "loss": 0.1648, "step": 3831 }, { "epoch": 0.3058992576035763, "grad_norm": 0.3124436876528448, "learning_rate": 1.6266611237255617e-05, "loss": 0.2337, "step": 3832 }, { "epoch": 0.3059790851760198, "grad_norm": 0.3035443074306337, "learning_rate": 1.6264596199708837e-05, "loss": 0.1658, "step": 3833 }, { "epoch": 0.3060589127484633, "grad_norm": 0.36861954401607855, "learning_rate": 1.626258074339859e-05, "loss": 0.1746, "step": 3834 }, { "epoch": 0.3061387403209068, "grad_norm": 0.2894505099438143, "learning_rate": 1.6260564868459604e-05, "loss": 0.1688, "step": 3835 }, { "epoch": 0.3062185678933504, "grad_norm": 0.2658015341062763, "learning_rate": 1.625854857502663e-05, "loss": 0.1923, "step": 3836 }, { "epoch": 0.3062983954657939, "grad_norm": 0.2903133246357379, "learning_rate": 1.625653186323446e-05, "loss": 0.1957, "step": 3837 }, { "epoch": 0.3063782230382374, "grad_norm": 0.3184979407706405, "learning_rate": 1.625451473321789e-05, "loss": 0.2062, "step": 3838 }, { "epoch": 0.30645805061068093, "grad_norm": 0.277759483576123, "learning_rate": 1.625249718511176e-05, "loss": 0.1881, "step": 3839 }, { "epoch": 0.30653787818312445, "grad_norm": 0.28899302242703356, "learning_rate": 1.6250479219050935e-05, "loss": 0.234, "step": 3840 }, { "epoch": 0.30661770575556796, "grad_norm": 0.3501222279705946, "learning_rate": 1.6248460835170314e-05, "loss": 0.1506, "step": 3841 }, { "epoch": 0.3066975333280115, "grad_norm": 0.28816118684843367, "learning_rate": 1.624644203360481e-05, "loss": 0.197, "step": 3842 }, { "epoch": 0.30677736090045504, "grad_norm": 0.40273296299164946, "learning_rate": 1.6244422814489382e-05, "loss": 0.1689, "step": 3843 }, { "epoch": 0.30685718847289856, "grad_norm": 0.3385136313668694, "learning_rate": 1.6242403177959e-05, "loss": 0.231, "step": 3844 }, { "epoch": 0.30693701604534207, "grad_norm": 0.2722223213244881, "learning_rate": 1.6240383124148665e-05, "loss": 0.1884, "step": 3845 }, { "epoch": 0.3070168436177856, "grad_norm": 0.2792666269619822, "learning_rate": 1.6238362653193414e-05, "loss": 0.1819, "step": 3846 }, { "epoch": 0.3070966711902291, "grad_norm": 0.3365404429151752, "learning_rate": 1.6236341765228312e-05, "loss": 0.1799, "step": 3847 }, { "epoch": 0.3071764987626726, "grad_norm": 0.32685569673932185, "learning_rate": 1.6234320460388437e-05, "loss": 0.1634, "step": 3848 }, { "epoch": 0.3072563263351161, "grad_norm": 0.31789747820351566, "learning_rate": 1.6232298738808915e-05, "loss": 0.2025, "step": 3849 }, { "epoch": 0.3073361539075597, "grad_norm": 0.3306858513546602, "learning_rate": 1.6230276600624887e-05, "loss": 0.178, "step": 3850 }, { "epoch": 0.3074159814800032, "grad_norm": 0.30674728857148603, "learning_rate": 1.622825404597152e-05, "loss": 0.2652, "step": 3851 }, { "epoch": 0.3074958090524467, "grad_norm": 0.32184274582039846, "learning_rate": 1.6226231074984024e-05, "loss": 0.1476, "step": 3852 }, { "epoch": 0.30757563662489024, "grad_norm": 0.28794128659648976, "learning_rate": 1.6224207687797616e-05, "loss": 0.1692, "step": 3853 }, { "epoch": 0.30765546419733375, "grad_norm": 0.3334863169206088, "learning_rate": 1.6222183884547554e-05, "loss": 0.2003, "step": 3854 }, { "epoch": 0.30773529176977726, "grad_norm": 0.34992892066852727, "learning_rate": 1.6220159665369126e-05, "loss": 0.1652, "step": 3855 }, { "epoch": 0.3078151193422208, "grad_norm": 0.3136904415000312, "learning_rate": 1.6218135030397643e-05, "loss": 0.2585, "step": 3856 }, { "epoch": 0.30789494691466435, "grad_norm": 0.34720750964478336, "learning_rate": 1.6216109979768435e-05, "loss": 0.1942, "step": 3857 }, { "epoch": 0.30797477448710786, "grad_norm": 0.3299954841787462, "learning_rate": 1.621408451361688e-05, "loss": 0.1667, "step": 3858 }, { "epoch": 0.3080546020595514, "grad_norm": 0.41587554470623717, "learning_rate": 1.6212058632078366e-05, "loss": 0.1771, "step": 3859 }, { "epoch": 0.3081344296319949, "grad_norm": 0.3358149284034331, "learning_rate": 1.6210032335288312e-05, "loss": 0.1974, "step": 3860 }, { "epoch": 0.3082142572044384, "grad_norm": 0.2802791161539051, "learning_rate": 1.6208005623382176e-05, "loss": 0.198, "step": 3861 }, { "epoch": 0.3082940847768819, "grad_norm": 0.29365250158194384, "learning_rate": 1.620597849649543e-05, "loss": 0.1853, "step": 3862 }, { "epoch": 0.30837391234932543, "grad_norm": 0.36579417120975155, "learning_rate": 1.6203950954763584e-05, "loss": 0.1767, "step": 3863 }, { "epoch": 0.308453739921769, "grad_norm": 0.3837599181479856, "learning_rate": 1.6201922998322164e-05, "loss": 0.1976, "step": 3864 }, { "epoch": 0.3085335674942125, "grad_norm": 0.308808873514137, "learning_rate": 1.6199894627306737e-05, "loss": 0.2115, "step": 3865 }, { "epoch": 0.308613395066656, "grad_norm": 0.2729710884684171, "learning_rate": 1.6197865841852887e-05, "loss": 0.1829, "step": 3866 }, { "epoch": 0.30869322263909954, "grad_norm": 0.33729059820199747, "learning_rate": 1.619583664209624e-05, "loss": 0.1686, "step": 3867 }, { "epoch": 0.30877305021154305, "grad_norm": 0.37039149363778723, "learning_rate": 1.6193807028172427e-05, "loss": 0.2354, "step": 3868 }, { "epoch": 0.30885287778398657, "grad_norm": 0.2857758586369671, "learning_rate": 1.619177700021713e-05, "loss": 0.2037, "step": 3869 }, { "epoch": 0.30893270535643014, "grad_norm": 0.30703901395216976, "learning_rate": 1.618974655836604e-05, "loss": 0.1957, "step": 3870 }, { "epoch": 0.30901253292887365, "grad_norm": 0.3240445764167474, "learning_rate": 1.618771570275489e-05, "loss": 0.1761, "step": 3871 }, { "epoch": 0.30909236050131716, "grad_norm": 0.26684714214477767, "learning_rate": 1.6185684433519434e-05, "loss": 0.1952, "step": 3872 }, { "epoch": 0.3091721880737607, "grad_norm": 0.28363355819866126, "learning_rate": 1.618365275079545e-05, "loss": 0.1957, "step": 3873 }, { "epoch": 0.3092520156462042, "grad_norm": 0.3444396822004735, "learning_rate": 1.6181620654718754e-05, "loss": 0.2038, "step": 3874 }, { "epoch": 0.3093318432186477, "grad_norm": 0.34730648987021695, "learning_rate": 1.6179588145425176e-05, "loss": 0.1378, "step": 3875 }, { "epoch": 0.3094116707910912, "grad_norm": 0.37344409423136365, "learning_rate": 1.617755522305059e-05, "loss": 0.1915, "step": 3876 }, { "epoch": 0.3094914983635348, "grad_norm": 0.3626384027171668, "learning_rate": 1.6175521887730883e-05, "loss": 0.1895, "step": 3877 }, { "epoch": 0.3095713259359783, "grad_norm": 0.25757208168172524, "learning_rate": 1.6173488139601978e-05, "loss": 0.1673, "step": 3878 }, { "epoch": 0.3096511535084218, "grad_norm": 0.30657153434269113, "learning_rate": 1.6171453978799823e-05, "loss": 0.1806, "step": 3879 }, { "epoch": 0.30973098108086533, "grad_norm": 0.29681678040417303, "learning_rate": 1.616941940546039e-05, "loss": 0.2064, "step": 3880 }, { "epoch": 0.30981080865330884, "grad_norm": 0.34876025018660456, "learning_rate": 1.6167384419719686e-05, "loss": 0.1754, "step": 3881 }, { "epoch": 0.30989063622575236, "grad_norm": 0.3092894174157512, "learning_rate": 1.616534902171374e-05, "loss": 0.191, "step": 3882 }, { "epoch": 0.30997046379819587, "grad_norm": 0.31321087764864153, "learning_rate": 1.6163313211578612e-05, "loss": 0.1865, "step": 3883 }, { "epoch": 0.31005029137063944, "grad_norm": 0.3777746929130483, "learning_rate": 1.6161276989450385e-05, "loss": 0.1934, "step": 3884 }, { "epoch": 0.31013011894308296, "grad_norm": 0.3120562484344309, "learning_rate": 1.6159240355465175e-05, "loss": 0.2629, "step": 3885 }, { "epoch": 0.31020994651552647, "grad_norm": 0.3547568553654446, "learning_rate": 1.615720330975912e-05, "loss": 0.1618, "step": 3886 }, { "epoch": 0.31028977408797, "grad_norm": 0.2904207444913975, "learning_rate": 1.615516585246839e-05, "loss": 0.2041, "step": 3887 }, { "epoch": 0.3103696016604135, "grad_norm": 0.38858575123972894, "learning_rate": 1.6153127983729184e-05, "loss": 0.169, "step": 3888 }, { "epoch": 0.310449429232857, "grad_norm": 0.33620146864148476, "learning_rate": 1.6151089703677722e-05, "loss": 0.1624, "step": 3889 }, { "epoch": 0.3105292568053005, "grad_norm": 0.3117069362420713, "learning_rate": 1.6149051012450256e-05, "loss": 0.1718, "step": 3890 }, { "epoch": 0.3106090843777441, "grad_norm": 0.3241102162481703, "learning_rate": 1.6147011910183065e-05, "loss": 0.1966, "step": 3891 }, { "epoch": 0.3106889119501876, "grad_norm": 0.3466473393452277, "learning_rate": 1.6144972397012452e-05, "loss": 0.192, "step": 3892 }, { "epoch": 0.3107687395226311, "grad_norm": 0.312191687324976, "learning_rate": 1.6142932473074754e-05, "loss": 0.1883, "step": 3893 }, { "epoch": 0.31084856709507463, "grad_norm": 0.3089384909118146, "learning_rate": 1.6140892138506327e-05, "loss": 0.1915, "step": 3894 }, { "epoch": 0.31092839466751815, "grad_norm": 0.3402794714263412, "learning_rate": 1.613885139344357e-05, "loss": 0.1884, "step": 3895 }, { "epoch": 0.31100822223996166, "grad_norm": 0.3214346734134471, "learning_rate": 1.6136810238022884e-05, "loss": 0.1567, "step": 3896 }, { "epoch": 0.31108804981240523, "grad_norm": 0.31328244460616467, "learning_rate": 1.6134768672380726e-05, "loss": 0.2129, "step": 3897 }, { "epoch": 0.31116787738484875, "grad_norm": 0.36683570014869027, "learning_rate": 1.6132726696653557e-05, "loss": 0.195, "step": 3898 }, { "epoch": 0.31124770495729226, "grad_norm": 0.29180664364724934, "learning_rate": 1.6130684310977875e-05, "loss": 0.1755, "step": 3899 }, { "epoch": 0.3113275325297358, "grad_norm": 0.3501033897325915, "learning_rate": 1.612864151549021e-05, "loss": 0.1955, "step": 3900 }, { "epoch": 0.3114073601021793, "grad_norm": 0.35555411711600154, "learning_rate": 1.6126598310327116e-05, "loss": 0.1596, "step": 3901 }, { "epoch": 0.3114871876746228, "grad_norm": 0.29537501410714323, "learning_rate": 1.6124554695625166e-05, "loss": 0.1505, "step": 3902 }, { "epoch": 0.3115670152470663, "grad_norm": 0.33175799570208997, "learning_rate": 1.6122510671520976e-05, "loss": 0.1568, "step": 3903 }, { "epoch": 0.3116468428195099, "grad_norm": 0.3512830983881282, "learning_rate": 1.6120466238151177e-05, "loss": 0.1401, "step": 3904 }, { "epoch": 0.3117266703919534, "grad_norm": 0.3956238624276395, "learning_rate": 1.6118421395652433e-05, "loss": 0.2288, "step": 3905 }, { "epoch": 0.3118064979643969, "grad_norm": 0.35349093381602853, "learning_rate": 1.6116376144161428e-05, "loss": 0.1951, "step": 3906 }, { "epoch": 0.3118863255368404, "grad_norm": 0.2628557407896042, "learning_rate": 1.6114330483814884e-05, "loss": 0.1889, "step": 3907 }, { "epoch": 0.31196615310928394, "grad_norm": 0.4175598606863147, "learning_rate": 1.6112284414749545e-05, "loss": 0.1602, "step": 3908 }, { "epoch": 0.31204598068172745, "grad_norm": 0.38487508392070446, "learning_rate": 1.611023793710218e-05, "loss": 0.1341, "step": 3909 }, { "epoch": 0.31212580825417097, "grad_norm": 0.2946915419963849, "learning_rate": 1.610819105100959e-05, "loss": 0.1778, "step": 3910 }, { "epoch": 0.31220563582661454, "grad_norm": 0.2791839232850039, "learning_rate": 1.61061437566086e-05, "loss": 0.2002, "step": 3911 }, { "epoch": 0.31228546339905805, "grad_norm": 0.3544813133079716, "learning_rate": 1.6104096054036067e-05, "loss": 0.1704, "step": 3912 }, { "epoch": 0.31236529097150156, "grad_norm": 0.38695941818415586, "learning_rate": 1.610204794342887e-05, "loss": 0.2016, "step": 3913 }, { "epoch": 0.3124451185439451, "grad_norm": 0.3312606533935601, "learning_rate": 1.6099999424923913e-05, "loss": 0.1667, "step": 3914 }, { "epoch": 0.3125249461163886, "grad_norm": 0.3006853163341539, "learning_rate": 1.6097950498658136e-05, "loss": 0.2076, "step": 3915 }, { "epoch": 0.3126047736888321, "grad_norm": 0.2811884983182447, "learning_rate": 1.60959011647685e-05, "loss": 0.2217, "step": 3916 }, { "epoch": 0.3126846012612756, "grad_norm": 0.3095836043328664, "learning_rate": 1.6093851423391998e-05, "loss": 0.1834, "step": 3917 }, { "epoch": 0.3127644288337192, "grad_norm": 0.29426833578121653, "learning_rate": 1.6091801274665643e-05, "loss": 0.1574, "step": 3918 }, { "epoch": 0.3128442564061627, "grad_norm": 0.3207393222154718, "learning_rate": 1.608975071872648e-05, "loss": 0.2095, "step": 3919 }, { "epoch": 0.3129240839786062, "grad_norm": 0.280464731431716, "learning_rate": 1.608769975571158e-05, "loss": 0.1924, "step": 3920 }, { "epoch": 0.31300391155104973, "grad_norm": 0.27177515973417576, "learning_rate": 1.6085648385758044e-05, "loss": 0.2103, "step": 3921 }, { "epoch": 0.31308373912349324, "grad_norm": 0.3184453028949072, "learning_rate": 1.6083596609002996e-05, "loss": 0.1662, "step": 3922 }, { "epoch": 0.31316356669593676, "grad_norm": 0.33824528253248387, "learning_rate": 1.6081544425583592e-05, "loss": 0.1714, "step": 3923 }, { "epoch": 0.3132433942683803, "grad_norm": 0.32311958665302143, "learning_rate": 1.607949183563701e-05, "loss": 0.2311, "step": 3924 }, { "epoch": 0.31332322184082384, "grad_norm": 0.2915158194804249, "learning_rate": 1.6077438839300455e-05, "loss": 0.207, "step": 3925 }, { "epoch": 0.31340304941326735, "grad_norm": 0.3420855551014761, "learning_rate": 1.6075385436711168e-05, "loss": 0.2233, "step": 3926 }, { "epoch": 0.31348287698571087, "grad_norm": 0.29878885054327553, "learning_rate": 1.6073331628006408e-05, "loss": 0.2352, "step": 3927 }, { "epoch": 0.3135627045581544, "grad_norm": 0.42560292914581876, "learning_rate": 1.607127741332346e-05, "loss": 0.1979, "step": 3928 }, { "epoch": 0.3136425321305979, "grad_norm": 0.3053638235001819, "learning_rate": 1.6069222792799646e-05, "loss": 0.1807, "step": 3929 }, { "epoch": 0.3137223597030414, "grad_norm": 0.34671246123508037, "learning_rate": 1.6067167766572306e-05, "loss": 0.1807, "step": 3930 }, { "epoch": 0.313802187275485, "grad_norm": 0.29989880933317675, "learning_rate": 1.606511233477881e-05, "loss": 0.1783, "step": 3931 }, { "epoch": 0.3138820148479285, "grad_norm": 0.3642967721937253, "learning_rate": 1.6063056497556554e-05, "loss": 0.1638, "step": 3932 }, { "epoch": 0.313961842420372, "grad_norm": 0.30520765884923456, "learning_rate": 1.606100025504297e-05, "loss": 0.1946, "step": 3933 }, { "epoch": 0.3140416699928155, "grad_norm": 0.3119591403775658, "learning_rate": 1.6058943607375506e-05, "loss": 0.1864, "step": 3934 }, { "epoch": 0.31412149756525903, "grad_norm": 0.26344285070794254, "learning_rate": 1.605688655469164e-05, "loss": 0.1849, "step": 3935 }, { "epoch": 0.31420132513770255, "grad_norm": 0.32935824331315655, "learning_rate": 1.6054829097128872e-05, "loss": 0.2309, "step": 3936 }, { "epoch": 0.31428115271014606, "grad_norm": 0.31027331923636103, "learning_rate": 1.6052771234824744e-05, "loss": 0.2212, "step": 3937 }, { "epoch": 0.31436098028258963, "grad_norm": 0.31167859189369257, "learning_rate": 1.6050712967916812e-05, "loss": 0.156, "step": 3938 }, { "epoch": 0.31444080785503314, "grad_norm": 0.2820883823647257, "learning_rate": 1.6048654296542662e-05, "loss": 0.2062, "step": 3939 }, { "epoch": 0.31452063542747666, "grad_norm": 0.43102146538569686, "learning_rate": 1.604659522083991e-05, "loss": 0.2199, "step": 3940 }, { "epoch": 0.31460046299992017, "grad_norm": 0.3503793471060505, "learning_rate": 1.6044535740946195e-05, "loss": 0.222, "step": 3941 }, { "epoch": 0.3146802905723637, "grad_norm": 0.33394689689906654, "learning_rate": 1.604247585699919e-05, "loss": 0.2036, "step": 3942 }, { "epoch": 0.3147601181448072, "grad_norm": 0.28459961689694957, "learning_rate": 1.6040415569136586e-05, "loss": 0.1698, "step": 3943 }, { "epoch": 0.3148399457172507, "grad_norm": 0.35953741665296757, "learning_rate": 1.6038354877496104e-05, "loss": 0.1758, "step": 3944 }, { "epoch": 0.3149197732896943, "grad_norm": 0.2868257729465299, "learning_rate": 1.6036293782215494e-05, "loss": 0.2034, "step": 3945 }, { "epoch": 0.3149996008621378, "grad_norm": 0.3404186686274067, "learning_rate": 1.6034232283432532e-05, "loss": 0.1708, "step": 3946 }, { "epoch": 0.3150794284345813, "grad_norm": 0.3926116526857161, "learning_rate": 1.6032170381285027e-05, "loss": 0.1487, "step": 3947 }, { "epoch": 0.3151592560070248, "grad_norm": 0.2475495003773309, "learning_rate": 1.6030108075910797e-05, "loss": 0.1624, "step": 3948 }, { "epoch": 0.31523908357946834, "grad_norm": 0.27755193634691716, "learning_rate": 1.6028045367447713e-05, "loss": 0.1618, "step": 3949 }, { "epoch": 0.31531891115191185, "grad_norm": 0.3253933619655723, "learning_rate": 1.6025982256033648e-05, "loss": 0.2037, "step": 3950 }, { "epoch": 0.31539873872435537, "grad_norm": 0.29247611209350116, "learning_rate": 1.6023918741806515e-05, "loss": 0.2447, "step": 3951 }, { "epoch": 0.31547856629679893, "grad_norm": 0.3176126108810829, "learning_rate": 1.6021854824904256e-05, "loss": 0.1809, "step": 3952 }, { "epoch": 0.31555839386924245, "grad_norm": 0.35212699899216426, "learning_rate": 1.6019790505464832e-05, "loss": 0.1699, "step": 3953 }, { "epoch": 0.31563822144168596, "grad_norm": 0.30854328052364777, "learning_rate": 1.6017725783626235e-05, "loss": 0.1914, "step": 3954 }, { "epoch": 0.3157180490141295, "grad_norm": 0.26520939658508236, "learning_rate": 1.6015660659526484e-05, "loss": 0.1499, "step": 3955 }, { "epoch": 0.315797876586573, "grad_norm": 0.2870944922645638, "learning_rate": 1.6013595133303628e-05, "loss": 0.1697, "step": 3956 }, { "epoch": 0.3158777041590165, "grad_norm": 0.27688836831949243, "learning_rate": 1.601152920509573e-05, "loss": 0.1793, "step": 3957 }, { "epoch": 0.3159575317314601, "grad_norm": 0.28369016295857225, "learning_rate": 1.60094628750409e-05, "loss": 0.1756, "step": 3958 }, { "epoch": 0.3160373593039036, "grad_norm": 0.29888712892619096, "learning_rate": 1.6007396143277257e-05, "loss": 0.1995, "step": 3959 }, { "epoch": 0.3161171868763471, "grad_norm": 0.3398823628452988, "learning_rate": 1.600532900994295e-05, "loss": 0.1963, "step": 3960 }, { "epoch": 0.3161970144487906, "grad_norm": 0.31793571353783223, "learning_rate": 1.6003261475176173e-05, "loss": 0.2463, "step": 3961 }, { "epoch": 0.31627684202123413, "grad_norm": 0.31942304506962704, "learning_rate": 1.6001193539115118e-05, "loss": 0.1979, "step": 3962 }, { "epoch": 0.31635666959367764, "grad_norm": 0.3030818605285829, "learning_rate": 1.5999125201898027e-05, "loss": 0.1613, "step": 3963 }, { "epoch": 0.31643649716612116, "grad_norm": 0.30272810489976915, "learning_rate": 1.5997056463663158e-05, "loss": 0.1878, "step": 3964 }, { "epoch": 0.3165163247385647, "grad_norm": 0.27802654036431407, "learning_rate": 1.5994987324548795e-05, "loss": 0.1525, "step": 3965 }, { "epoch": 0.31659615231100824, "grad_norm": 0.2818291323351125, "learning_rate": 1.5992917784693257e-05, "loss": 0.1918, "step": 3966 }, { "epoch": 0.31667597988345175, "grad_norm": 0.33974671159142505, "learning_rate": 1.5990847844234877e-05, "loss": 0.2095, "step": 3967 }, { "epoch": 0.31675580745589527, "grad_norm": 0.3121548553019233, "learning_rate": 1.598877750331203e-05, "loss": 0.1981, "step": 3968 }, { "epoch": 0.3168356350283388, "grad_norm": 0.3004589837728732, "learning_rate": 1.5986706762063105e-05, "loss": 0.1663, "step": 3969 }, { "epoch": 0.3169154626007823, "grad_norm": 0.2758882796887992, "learning_rate": 1.5984635620626528e-05, "loss": 0.2053, "step": 3970 }, { "epoch": 0.3169952901732258, "grad_norm": 0.2945597114924614, "learning_rate": 1.5982564079140745e-05, "loss": 0.169, "step": 3971 }, { "epoch": 0.3170751177456694, "grad_norm": 0.26946632032116874, "learning_rate": 1.5980492137744222e-05, "loss": 0.1898, "step": 3972 }, { "epoch": 0.3171549453181129, "grad_norm": 0.33666159266679047, "learning_rate": 1.597841979657547e-05, "loss": 0.1706, "step": 3973 }, { "epoch": 0.3172347728905564, "grad_norm": 0.36988578926703375, "learning_rate": 1.5976347055773013e-05, "loss": 0.1712, "step": 3974 }, { "epoch": 0.3173146004629999, "grad_norm": 0.28593284864554214, "learning_rate": 1.5974273915475408e-05, "loss": 0.1607, "step": 3975 }, { "epoch": 0.31739442803544343, "grad_norm": 0.26738400932327844, "learning_rate": 1.5972200375821234e-05, "loss": 0.2019, "step": 3976 }, { "epoch": 0.31747425560788695, "grad_norm": 0.3196055946270032, "learning_rate": 1.59701264369491e-05, "loss": 0.1711, "step": 3977 }, { "epoch": 0.31755408318033046, "grad_norm": 0.29727330008147257, "learning_rate": 1.5968052098997642e-05, "loss": 0.1937, "step": 3978 }, { "epoch": 0.31763391075277403, "grad_norm": 0.28128532544362156, "learning_rate": 1.5965977362105517e-05, "loss": 0.1788, "step": 3979 }, { "epoch": 0.31771373832521754, "grad_norm": 0.2945896689816886, "learning_rate": 1.5963902226411412e-05, "loss": 0.1735, "step": 3980 }, { "epoch": 0.31779356589766106, "grad_norm": 0.27593925948560705, "learning_rate": 1.5961826692054048e-05, "loss": 0.172, "step": 3981 }, { "epoch": 0.31787339347010457, "grad_norm": 0.3920873800042026, "learning_rate": 1.5959750759172164e-05, "loss": 0.1612, "step": 3982 }, { "epoch": 0.3179532210425481, "grad_norm": 0.3892691033703409, "learning_rate": 1.5957674427904525e-05, "loss": 0.1836, "step": 3983 }, { "epoch": 0.3180330486149916, "grad_norm": 0.30816679754596477, "learning_rate": 1.595559769838993e-05, "loss": 0.2003, "step": 3984 }, { "epoch": 0.31811287618743517, "grad_norm": 0.32272409979141237, "learning_rate": 1.5953520570767196e-05, "loss": 0.2235, "step": 3985 }, { "epoch": 0.3181927037598787, "grad_norm": 0.3060121543033469, "learning_rate": 1.5951443045175173e-05, "loss": 0.193, "step": 3986 }, { "epoch": 0.3182725313323222, "grad_norm": 0.3332876863193432, "learning_rate": 1.5949365121752734e-05, "loss": 0.1591, "step": 3987 }, { "epoch": 0.3183523589047657, "grad_norm": 0.3392437476740896, "learning_rate": 1.594728680063878e-05, "loss": 0.1865, "step": 3988 }, { "epoch": 0.3184321864772092, "grad_norm": 0.3651745228778283, "learning_rate": 1.594520808197224e-05, "loss": 0.17, "step": 3989 }, { "epoch": 0.31851201404965274, "grad_norm": 0.3286934004702798, "learning_rate": 1.594312896589207e-05, "loss": 0.1825, "step": 3990 }, { "epoch": 0.31859184162209625, "grad_norm": 0.32895246177697857, "learning_rate": 1.5941049452537245e-05, "loss": 0.2074, "step": 3991 }, { "epoch": 0.3186716691945398, "grad_norm": 0.2790632175923471, "learning_rate": 1.5938969542046777e-05, "loss": 0.1898, "step": 3992 }, { "epoch": 0.31875149676698333, "grad_norm": 0.3246066335863351, "learning_rate": 1.59368892345597e-05, "loss": 0.1502, "step": 3993 }, { "epoch": 0.31883132433942685, "grad_norm": 0.35809862976750095, "learning_rate": 1.5934808530215066e-05, "loss": 0.1937, "step": 3994 }, { "epoch": 0.31891115191187036, "grad_norm": 0.310093509372538, "learning_rate": 1.593272742915197e-05, "loss": 0.1853, "step": 3995 }, { "epoch": 0.3189909794843139, "grad_norm": 0.2862528534613511, "learning_rate": 1.593064593150953e-05, "loss": 0.2455, "step": 3996 }, { "epoch": 0.3190708070567574, "grad_norm": 0.30075006976247637, "learning_rate": 1.5928564037426876e-05, "loss": 0.1575, "step": 3997 }, { "epoch": 0.3191506346292009, "grad_norm": 0.3712557778237777, "learning_rate": 1.5926481747043173e-05, "loss": 0.1953, "step": 3998 }, { "epoch": 0.31923046220164447, "grad_norm": 0.29641603182731985, "learning_rate": 1.5924399060497628e-05, "loss": 0.2288, "step": 3999 }, { "epoch": 0.319310289774088, "grad_norm": 0.3022468186204335, "learning_rate": 1.5922315977929444e-05, "loss": 0.1708, "step": 4000 }, { "epoch": 0.3193901173465315, "grad_norm": 0.3877600018821886, "learning_rate": 1.5920232499477878e-05, "loss": 0.1724, "step": 4001 }, { "epoch": 0.319469944918975, "grad_norm": 0.3482319545321769, "learning_rate": 1.5918148625282196e-05, "loss": 0.1617, "step": 4002 }, { "epoch": 0.3195497724914185, "grad_norm": 0.3432847659024911, "learning_rate": 1.5916064355481696e-05, "loss": 0.1899, "step": 4003 }, { "epoch": 0.31962960006386204, "grad_norm": 0.27006119354625413, "learning_rate": 1.5913979690215713e-05, "loss": 0.169, "step": 4004 }, { "epoch": 0.31970942763630555, "grad_norm": 0.3159587373739021, "learning_rate": 1.5911894629623587e-05, "loss": 0.193, "step": 4005 }, { "epoch": 0.3197892552087491, "grad_norm": 0.31880685405901626, "learning_rate": 1.5909809173844703e-05, "loss": 0.1852, "step": 4006 }, { "epoch": 0.31986908278119264, "grad_norm": 0.27900527407237286, "learning_rate": 1.5907723323018464e-05, "loss": 0.1678, "step": 4007 }, { "epoch": 0.31994891035363615, "grad_norm": 0.34879545497186865, "learning_rate": 1.5905637077284296e-05, "loss": 0.1533, "step": 4008 }, { "epoch": 0.32002873792607966, "grad_norm": 0.3525375128456388, "learning_rate": 1.5903550436781665e-05, "loss": 0.1834, "step": 4009 }, { "epoch": 0.3201085654985232, "grad_norm": 0.2922169277590276, "learning_rate": 1.590146340165005e-05, "loss": 0.2179, "step": 4010 }, { "epoch": 0.3201883930709667, "grad_norm": 0.34577688734892115, "learning_rate": 1.589937597202896e-05, "loss": 0.2101, "step": 4011 }, { "epoch": 0.3202682206434102, "grad_norm": 0.32598278049868884, "learning_rate": 1.589728814805793e-05, "loss": 0.1528, "step": 4012 }, { "epoch": 0.3203480482158538, "grad_norm": 0.33894213797139217, "learning_rate": 1.5895199929876527e-05, "loss": 0.1601, "step": 4013 }, { "epoch": 0.3204278757882973, "grad_norm": 0.3624411680005909, "learning_rate": 1.589311131762434e-05, "loss": 0.1601, "step": 4014 }, { "epoch": 0.3205077033607408, "grad_norm": 0.2945597309897751, "learning_rate": 1.589102231144098e-05, "loss": 0.2055, "step": 4015 }, { "epoch": 0.3205875309331843, "grad_norm": 0.28176459671343296, "learning_rate": 1.5888932911466097e-05, "loss": 0.1651, "step": 4016 }, { "epoch": 0.32066735850562783, "grad_norm": 0.3437604069815947, "learning_rate": 1.5886843117839352e-05, "loss": 0.2287, "step": 4017 }, { "epoch": 0.32074718607807134, "grad_norm": 0.32655935197127345, "learning_rate": 1.588475293070044e-05, "loss": 0.1872, "step": 4018 }, { "epoch": 0.3208270136505149, "grad_norm": 0.3211804493736286, "learning_rate": 1.588266235018908e-05, "loss": 0.1698, "step": 4019 }, { "epoch": 0.3209068412229584, "grad_norm": 0.31362646350793705, "learning_rate": 1.5880571376445026e-05, "loss": 0.1528, "step": 4020 }, { "epoch": 0.32098666879540194, "grad_norm": 0.2638566498957536, "learning_rate": 1.5878480009608046e-05, "loss": 0.1948, "step": 4021 }, { "epoch": 0.32106649636784546, "grad_norm": 0.35253531252835507, "learning_rate": 1.587638824981794e-05, "loss": 0.1457, "step": 4022 }, { "epoch": 0.32114632394028897, "grad_norm": 0.3330498755359518, "learning_rate": 1.587429609721454e-05, "loss": 0.1681, "step": 4023 }, { "epoch": 0.3212261515127325, "grad_norm": 0.32699259852956747, "learning_rate": 1.5872203551937692e-05, "loss": 0.205, "step": 4024 }, { "epoch": 0.321305979085176, "grad_norm": 0.3070211275562321, "learning_rate": 1.587011061412727e-05, "loss": 0.2008, "step": 4025 }, { "epoch": 0.32138580665761957, "grad_norm": 0.33989721211680546, "learning_rate": 1.5868017283923185e-05, "loss": 0.1675, "step": 4026 }, { "epoch": 0.3214656342300631, "grad_norm": 0.3746603955822216, "learning_rate": 1.5865923561465367e-05, "loss": 0.1899, "step": 4027 }, { "epoch": 0.3215454618025066, "grad_norm": 0.24091943163562898, "learning_rate": 1.5863829446893778e-05, "loss": 0.1773, "step": 4028 }, { "epoch": 0.3216252893749501, "grad_norm": 0.3425599698768121, "learning_rate": 1.586173494034839e-05, "loss": 0.214, "step": 4029 }, { "epoch": 0.3217051169473936, "grad_norm": 0.3285277330906818, "learning_rate": 1.585964004196922e-05, "loss": 0.1825, "step": 4030 }, { "epoch": 0.32178494451983713, "grad_norm": 0.31573425840652175, "learning_rate": 1.5857544751896306e-05, "loss": 0.2313, "step": 4031 }, { "epoch": 0.32186477209228065, "grad_norm": 0.3519963079985257, "learning_rate": 1.5855449070269702e-05, "loss": 0.1967, "step": 4032 }, { "epoch": 0.3219445996647242, "grad_norm": 0.2951764087269709, "learning_rate": 1.5853352997229502e-05, "loss": 0.218, "step": 4033 }, { "epoch": 0.32202442723716773, "grad_norm": 0.33579354687910973, "learning_rate": 1.585125653291582e-05, "loss": 0.1832, "step": 4034 }, { "epoch": 0.32210425480961125, "grad_norm": 0.29048122816699673, "learning_rate": 1.584915967746879e-05, "loss": 0.1465, "step": 4035 }, { "epoch": 0.32218408238205476, "grad_norm": 0.3267829681281019, "learning_rate": 1.5847062431028587e-05, "loss": 0.2056, "step": 4036 }, { "epoch": 0.3222639099544983, "grad_norm": 0.3590317101097327, "learning_rate": 1.5844964793735398e-05, "loss": 0.1767, "step": 4037 }, { "epoch": 0.3223437375269418, "grad_norm": 0.2802964595422545, "learning_rate": 1.5842866765729447e-05, "loss": 0.1929, "step": 4038 }, { "epoch": 0.3224235650993853, "grad_norm": 0.3361920966636533, "learning_rate": 1.5840768347150973e-05, "loss": 0.2096, "step": 4039 }, { "epoch": 0.32250339267182887, "grad_norm": 0.3125779026915075, "learning_rate": 1.5838669538140248e-05, "loss": 0.1572, "step": 4040 }, { "epoch": 0.3225832202442724, "grad_norm": 0.32618085532339275, "learning_rate": 1.5836570338837568e-05, "loss": 0.2275, "step": 4041 }, { "epoch": 0.3226630478167159, "grad_norm": 0.34446877575632834, "learning_rate": 1.5834470749383262e-05, "loss": 0.2013, "step": 4042 }, { "epoch": 0.3227428753891594, "grad_norm": 0.30042508658679673, "learning_rate": 1.5832370769917678e-05, "loss": 0.1645, "step": 4043 }, { "epoch": 0.3228227029616029, "grad_norm": 0.2461387522097612, "learning_rate": 1.5830270400581187e-05, "loss": 0.2026, "step": 4044 }, { "epoch": 0.32290253053404644, "grad_norm": 0.33110382614378553, "learning_rate": 1.582816964151419e-05, "loss": 0.1724, "step": 4045 }, { "epoch": 0.32298235810649, "grad_norm": 0.29438063628414596, "learning_rate": 1.582606849285712e-05, "loss": 0.157, "step": 4046 }, { "epoch": 0.3230621856789335, "grad_norm": 0.3038062137044817, "learning_rate": 1.582396695475043e-05, "loss": 0.1783, "step": 4047 }, { "epoch": 0.32314201325137704, "grad_norm": 0.2794121918994982, "learning_rate": 1.582186502733459e-05, "loss": 0.1609, "step": 4048 }, { "epoch": 0.32322184082382055, "grad_norm": 0.3283054210811467, "learning_rate": 1.581976271075012e-05, "loss": 0.1625, "step": 4049 }, { "epoch": 0.32330166839626406, "grad_norm": 0.29967315267824285, "learning_rate": 1.581766000513754e-05, "loss": 0.1665, "step": 4050 }, { "epoch": 0.3233814959687076, "grad_norm": 0.2973006803754067, "learning_rate": 1.581555691063741e-05, "loss": 0.2022, "step": 4051 }, { "epoch": 0.3234613235411511, "grad_norm": 0.34962796439883087, "learning_rate": 1.5813453427390316e-05, "loss": 0.1936, "step": 4052 }, { "epoch": 0.32354115111359466, "grad_norm": 0.3312209294228682, "learning_rate": 1.581134955553687e-05, "loss": 0.2238, "step": 4053 }, { "epoch": 0.3236209786860382, "grad_norm": 0.29414312179048674, "learning_rate": 1.5809245295217702e-05, "loss": 0.1624, "step": 4054 }, { "epoch": 0.3237008062584817, "grad_norm": 0.279207004728643, "learning_rate": 1.5807140646573475e-05, "loss": 0.1499, "step": 4055 }, { "epoch": 0.3237806338309252, "grad_norm": 0.2908451126879685, "learning_rate": 1.5805035609744878e-05, "loss": 0.1678, "step": 4056 }, { "epoch": 0.3238604614033687, "grad_norm": 0.26747666476859266, "learning_rate": 1.580293018487262e-05, "loss": 0.1505, "step": 4057 }, { "epoch": 0.32394028897581223, "grad_norm": 0.30136327756841164, "learning_rate": 1.580082437209745e-05, "loss": 0.1825, "step": 4058 }, { "epoch": 0.32402011654825574, "grad_norm": 0.30308608554973565, "learning_rate": 1.5798718171560124e-05, "loss": 0.2475, "step": 4059 }, { "epoch": 0.3240999441206993, "grad_norm": 0.36594092658991795, "learning_rate": 1.5796611583401434e-05, "loss": 0.1563, "step": 4060 }, { "epoch": 0.3241797716931428, "grad_norm": 0.3201570297234588, "learning_rate": 1.5794504607762203e-05, "loss": 0.2126, "step": 4061 }, { "epoch": 0.32425959926558634, "grad_norm": 0.3268433304206602, "learning_rate": 1.5792397244783272e-05, "loss": 0.1597, "step": 4062 }, { "epoch": 0.32433942683802985, "grad_norm": 0.30155790868708826, "learning_rate": 1.5790289494605503e-05, "loss": 0.2103, "step": 4063 }, { "epoch": 0.32441925441047337, "grad_norm": 0.3191818466005933, "learning_rate": 1.5788181357369798e-05, "loss": 0.2173, "step": 4064 }, { "epoch": 0.3244990819829169, "grad_norm": 0.3236694995469778, "learning_rate": 1.5786072833217075e-05, "loss": 0.2338, "step": 4065 }, { "epoch": 0.3245789095553604, "grad_norm": 0.30684532398103226, "learning_rate": 1.5783963922288283e-05, "loss": 0.1987, "step": 4066 }, { "epoch": 0.32465873712780396, "grad_norm": 0.2627727518738647, "learning_rate": 1.578185462472439e-05, "loss": 0.1714, "step": 4067 }, { "epoch": 0.3247385647002475, "grad_norm": 0.28073016540361045, "learning_rate": 1.57797449406664e-05, "loss": 0.1672, "step": 4068 }, { "epoch": 0.324818392272691, "grad_norm": 0.34143021775558996, "learning_rate": 1.577763487025533e-05, "loss": 0.1736, "step": 4069 }, { "epoch": 0.3248982198451345, "grad_norm": 0.29998665836203064, "learning_rate": 1.5775524413632233e-05, "loss": 0.1995, "step": 4070 }, { "epoch": 0.324978047417578, "grad_norm": 0.25150527894819735, "learning_rate": 1.5773413570938187e-05, "loss": 0.1776, "step": 4071 }, { "epoch": 0.32505787499002153, "grad_norm": 0.305305631287347, "learning_rate": 1.577130234231429e-05, "loss": 0.1911, "step": 4072 }, { "epoch": 0.3251377025624651, "grad_norm": 0.3935140919664378, "learning_rate": 1.5769190727901674e-05, "loss": 0.1768, "step": 4073 }, { "epoch": 0.3252175301349086, "grad_norm": 0.2818933466095696, "learning_rate": 1.5767078727841486e-05, "loss": 0.1735, "step": 4074 }, { "epoch": 0.32529735770735213, "grad_norm": 0.33453555834765014, "learning_rate": 1.576496634227491e-05, "loss": 0.2282, "step": 4075 }, { "epoch": 0.32537718527979564, "grad_norm": 0.3246013367422234, "learning_rate": 1.5762853571343146e-05, "loss": 0.1795, "step": 4076 }, { "epoch": 0.32545701285223916, "grad_norm": 0.27748967649771006, "learning_rate": 1.576074041518743e-05, "loss": 0.1802, "step": 4077 }, { "epoch": 0.32553684042468267, "grad_norm": 0.29586807943197346, "learning_rate": 1.575862687394901e-05, "loss": 0.1841, "step": 4078 }, { "epoch": 0.3256166679971262, "grad_norm": 0.2950742436985762, "learning_rate": 1.5756512947769177e-05, "loss": 0.1665, "step": 4079 }, { "epoch": 0.32569649556956976, "grad_norm": 0.2718817001830189, "learning_rate": 1.5754398636789233e-05, "loss": 0.1843, "step": 4080 }, { "epoch": 0.32577632314201327, "grad_norm": 0.31809454921205116, "learning_rate": 1.5752283941150514e-05, "loss": 0.1735, "step": 4081 }, { "epoch": 0.3258561507144568, "grad_norm": 0.26772668816552603, "learning_rate": 1.5750168860994377e-05, "loss": 0.1694, "step": 4082 }, { "epoch": 0.3259359782869003, "grad_norm": 0.3020573684875149, "learning_rate": 1.5748053396462205e-05, "loss": 0.1785, "step": 4083 }, { "epoch": 0.3260158058593438, "grad_norm": 0.2669159011851644, "learning_rate": 1.5745937547695416e-05, "loss": 0.1428, "step": 4084 }, { "epoch": 0.3260956334317873, "grad_norm": 0.3011608534616438, "learning_rate": 1.5743821314835435e-05, "loss": 0.1968, "step": 4085 }, { "epoch": 0.32617546100423084, "grad_norm": 0.31795803315951876, "learning_rate": 1.5741704698023738e-05, "loss": 0.1877, "step": 4086 }, { "epoch": 0.3262552885766744, "grad_norm": 0.34393877662497924, "learning_rate": 1.5739587697401797e-05, "loss": 0.1993, "step": 4087 }, { "epoch": 0.3263351161491179, "grad_norm": 0.307551953763417, "learning_rate": 1.5737470313111134e-05, "loss": 0.2033, "step": 4088 }, { "epoch": 0.32641494372156143, "grad_norm": 0.32487489820596943, "learning_rate": 1.5735352545293286e-05, "loss": 0.2058, "step": 4089 }, { "epoch": 0.32649477129400495, "grad_norm": 0.34998605978999126, "learning_rate": 1.573323439408982e-05, "loss": 0.2108, "step": 4090 }, { "epoch": 0.32657459886644846, "grad_norm": 0.3603878624980412, "learning_rate": 1.5731115859642324e-05, "loss": 0.1963, "step": 4091 }, { "epoch": 0.326654426438892, "grad_norm": 0.3829143593318482, "learning_rate": 1.572899694209241e-05, "loss": 0.1406, "step": 4092 }, { "epoch": 0.3267342540113355, "grad_norm": 0.27675343492361326, "learning_rate": 1.5726877641581722e-05, "loss": 0.1886, "step": 4093 }, { "epoch": 0.32681408158377906, "grad_norm": 0.3456966242720891, "learning_rate": 1.572475795825193e-05, "loss": 0.1963, "step": 4094 }, { "epoch": 0.3268939091562226, "grad_norm": 0.3398510663629564, "learning_rate": 1.5722637892244722e-05, "loss": 0.1771, "step": 4095 }, { "epoch": 0.3269737367286661, "grad_norm": 0.3922984784304093, "learning_rate": 1.5720517443701817e-05, "loss": 0.1305, "step": 4096 }, { "epoch": 0.3270535643011096, "grad_norm": 0.3854364879610997, "learning_rate": 1.571839661276496e-05, "loss": 0.1864, "step": 4097 }, { "epoch": 0.3271333918735531, "grad_norm": 0.3031020220040828, "learning_rate": 1.5716275399575923e-05, "loss": 0.2296, "step": 4098 }, { "epoch": 0.32721321944599663, "grad_norm": 0.29077644860390633, "learning_rate": 1.5714153804276494e-05, "loss": 0.1661, "step": 4099 }, { "epoch": 0.32729304701844014, "grad_norm": 0.33552831923957466, "learning_rate": 1.5712031827008498e-05, "loss": 0.1948, "step": 4100 }, { "epoch": 0.3273728745908837, "grad_norm": 0.3555883438204516, "learning_rate": 1.5709909467913778e-05, "loss": 0.1826, "step": 4101 }, { "epoch": 0.3274527021633272, "grad_norm": 0.31242705855309444, "learning_rate": 1.5707786727134208e-05, "loss": 0.1567, "step": 4102 }, { "epoch": 0.32753252973577074, "grad_norm": 0.29776629200841265, "learning_rate": 1.5705663604811682e-05, "loss": 0.2036, "step": 4103 }, { "epoch": 0.32761235730821425, "grad_norm": 0.3465697887969457, "learning_rate": 1.5703540101088126e-05, "loss": 0.1851, "step": 4104 }, { "epoch": 0.32769218488065777, "grad_norm": 0.3296460030461376, "learning_rate": 1.5701416216105487e-05, "loss": 0.1532, "step": 4105 }, { "epoch": 0.3277720124531013, "grad_norm": 0.29076661584821195, "learning_rate": 1.569929195000573e-05, "loss": 0.1947, "step": 4106 }, { "epoch": 0.32785184002554485, "grad_norm": 0.31343676954574845, "learning_rate": 1.5697167302930863e-05, "loss": 0.2287, "step": 4107 }, { "epoch": 0.32793166759798836, "grad_norm": 0.3950106190585418, "learning_rate": 1.569504227502291e-05, "loss": 0.2061, "step": 4108 }, { "epoch": 0.3280114951704319, "grad_norm": 0.3590595774596323, "learning_rate": 1.569291686642392e-05, "loss": 0.1599, "step": 4109 }, { "epoch": 0.3280913227428754, "grad_norm": 0.3035446879459692, "learning_rate": 1.5690791077275962e-05, "loss": 0.1986, "step": 4110 }, { "epoch": 0.3281711503153189, "grad_norm": 0.4362941894674583, "learning_rate": 1.5688664907721146e-05, "loss": 0.1664, "step": 4111 }, { "epoch": 0.3282509778877624, "grad_norm": 0.3198164924042565, "learning_rate": 1.5686538357901592e-05, "loss": 0.2706, "step": 4112 }, { "epoch": 0.32833080546020593, "grad_norm": 0.2795123220990841, "learning_rate": 1.568441142795945e-05, "loss": 0.1347, "step": 4113 }, { "epoch": 0.3284106330326495, "grad_norm": 0.3454505475121623, "learning_rate": 1.5682284118036904e-05, "loss": 0.2007, "step": 4114 }, { "epoch": 0.328490460605093, "grad_norm": 0.3517499822220135, "learning_rate": 1.5680156428276147e-05, "loss": 0.1946, "step": 4115 }, { "epoch": 0.32857028817753653, "grad_norm": 0.3534134857333253, "learning_rate": 1.5678028358819414e-05, "loss": 0.1623, "step": 4116 }, { "epoch": 0.32865011574998004, "grad_norm": 0.32451244442655475, "learning_rate": 1.567589990980896e-05, "loss": 0.1879, "step": 4117 }, { "epoch": 0.32872994332242356, "grad_norm": 0.2922898717499741, "learning_rate": 1.5673771081387052e-05, "loss": 0.1771, "step": 4118 }, { "epoch": 0.32880977089486707, "grad_norm": 0.3857521930185625, "learning_rate": 1.5671641873696002e-05, "loss": 0.1468, "step": 4119 }, { "epoch": 0.3288895984673106, "grad_norm": 0.3383431072830323, "learning_rate": 1.5669512286878142e-05, "loss": 0.1773, "step": 4120 }, { "epoch": 0.32896942603975415, "grad_norm": 0.28484128867065467, "learning_rate": 1.566738232107582e-05, "loss": 0.2306, "step": 4121 }, { "epoch": 0.32904925361219767, "grad_norm": 0.3525624638571305, "learning_rate": 1.566525197643142e-05, "loss": 0.1938, "step": 4122 }, { "epoch": 0.3291290811846412, "grad_norm": 0.3519548116863774, "learning_rate": 1.5663121253087343e-05, "loss": 0.2086, "step": 4123 }, { "epoch": 0.3292089087570847, "grad_norm": 0.2959802289021054, "learning_rate": 1.566099015118602e-05, "loss": 0.1793, "step": 4124 }, { "epoch": 0.3292887363295282, "grad_norm": 0.34254229430996846, "learning_rate": 1.5658858670869907e-05, "loss": 0.1682, "step": 4125 }, { "epoch": 0.3293685639019717, "grad_norm": 0.3328691371986788, "learning_rate": 1.565672681228149e-05, "loss": 0.1517, "step": 4126 }, { "epoch": 0.32944839147441524, "grad_norm": 0.29161685230522, "learning_rate": 1.565459457556327e-05, "loss": 0.2098, "step": 4127 }, { "epoch": 0.3295282190468588, "grad_norm": 0.3422879505387846, "learning_rate": 1.5652461960857783e-05, "loss": 0.1764, "step": 4128 }, { "epoch": 0.3296080466193023, "grad_norm": 0.4128000419394897, "learning_rate": 1.5650328968307582e-05, "loss": 0.2176, "step": 4129 }, { "epoch": 0.32968787419174583, "grad_norm": 0.2848305463268323, "learning_rate": 1.5648195598055246e-05, "loss": 0.1846, "step": 4130 }, { "epoch": 0.32976770176418935, "grad_norm": 0.28466415405062456, "learning_rate": 1.564606185024339e-05, "loss": 0.1408, "step": 4131 }, { "epoch": 0.32984752933663286, "grad_norm": 0.30640499380110764, "learning_rate": 1.5643927725014644e-05, "loss": 0.1626, "step": 4132 }, { "epoch": 0.3299273569090764, "grad_norm": 0.3383420300951966, "learning_rate": 1.5641793222511666e-05, "loss": 0.1718, "step": 4133 }, { "epoch": 0.33000718448151994, "grad_norm": 0.3180824045443555, "learning_rate": 1.5639658342877135e-05, "loss": 0.1779, "step": 4134 }, { "epoch": 0.33008701205396346, "grad_norm": 0.2715659052395829, "learning_rate": 1.5637523086253765e-05, "loss": 0.17, "step": 4135 }, { "epoch": 0.33016683962640697, "grad_norm": 0.3432150098525914, "learning_rate": 1.563538745278428e-05, "loss": 0.1546, "step": 4136 }, { "epoch": 0.3302466671988505, "grad_norm": 0.35667243845570196, "learning_rate": 1.5633251442611453e-05, "loss": 0.2413, "step": 4137 }, { "epoch": 0.330326494771294, "grad_norm": 0.330582060674981, "learning_rate": 1.563111505587806e-05, "loss": 0.1964, "step": 4138 }, { "epoch": 0.3304063223437375, "grad_norm": 0.32031759629223083, "learning_rate": 1.5628978292726908e-05, "loss": 0.1674, "step": 4139 }, { "epoch": 0.330486149916181, "grad_norm": 0.3478570247028543, "learning_rate": 1.5626841153300834e-05, "loss": 0.2297, "step": 4140 }, { "epoch": 0.3305659774886246, "grad_norm": 0.2875494584802541, "learning_rate": 1.5624703637742695e-05, "loss": 0.1891, "step": 4141 }, { "epoch": 0.3306458050610681, "grad_norm": 0.2653092967032055, "learning_rate": 1.562256574619538e-05, "loss": 0.163, "step": 4142 }, { "epoch": 0.3307256326335116, "grad_norm": 0.42371461873836747, "learning_rate": 1.5620427478801793e-05, "loss": 0.2235, "step": 4143 }, { "epoch": 0.33080546020595514, "grad_norm": 0.34617385577044296, "learning_rate": 1.561828883570487e-05, "loss": 0.1764, "step": 4144 }, { "epoch": 0.33088528777839865, "grad_norm": 0.3676555370564631, "learning_rate": 1.5616149817047576e-05, "loss": 0.1942, "step": 4145 }, { "epoch": 0.33096511535084217, "grad_norm": 0.3265579715892366, "learning_rate": 1.561401042297289e-05, "loss": 0.1818, "step": 4146 }, { "epoch": 0.3310449429232857, "grad_norm": 0.31566013554646544, "learning_rate": 1.5611870653623826e-05, "loss": 0.1533, "step": 4147 }, { "epoch": 0.33112477049572925, "grad_norm": 0.3092317020590798, "learning_rate": 1.5609730509143416e-05, "loss": 0.177, "step": 4148 }, { "epoch": 0.33120459806817276, "grad_norm": 0.33182028336545966, "learning_rate": 1.560758998967472e-05, "loss": 0.1929, "step": 4149 }, { "epoch": 0.3312844256406163, "grad_norm": 0.27527591021397324, "learning_rate": 1.5605449095360828e-05, "loss": 0.1898, "step": 4150 }, { "epoch": 0.3313642532130598, "grad_norm": 0.33925189311307125, "learning_rate": 1.5603307826344848e-05, "loss": 0.1964, "step": 4151 }, { "epoch": 0.3314440807855033, "grad_norm": 0.36083178685550227, "learning_rate": 1.5601166182769912e-05, "loss": 0.1667, "step": 4152 }, { "epoch": 0.3315239083579468, "grad_norm": 0.343940591000769, "learning_rate": 1.559902416477918e-05, "loss": 0.1407, "step": 4153 }, { "epoch": 0.33160373593039033, "grad_norm": 0.31191318779746696, "learning_rate": 1.5596881772515845e-05, "loss": 0.1849, "step": 4154 }, { "epoch": 0.3316835635028339, "grad_norm": 0.37053383945888085, "learning_rate": 1.559473900612311e-05, "loss": 0.1979, "step": 4155 }, { "epoch": 0.3317633910752774, "grad_norm": 0.3216891551696449, "learning_rate": 1.5592595865744217e-05, "loss": 0.187, "step": 4156 }, { "epoch": 0.33184321864772093, "grad_norm": 0.2870453518979332, "learning_rate": 1.559045235152242e-05, "loss": 0.148, "step": 4157 }, { "epoch": 0.33192304622016444, "grad_norm": 0.37514257998061984, "learning_rate": 1.5588308463601007e-05, "loss": 0.1815, "step": 4158 }, { "epoch": 0.33200287379260796, "grad_norm": 0.30306412709733027, "learning_rate": 1.558616420212329e-05, "loss": 0.2275, "step": 4159 }, { "epoch": 0.33208270136505147, "grad_norm": 0.32507663849688506, "learning_rate": 1.55840195672326e-05, "loss": 0.1894, "step": 4160 }, { "epoch": 0.33216252893749504, "grad_norm": 0.34911474141264975, "learning_rate": 1.5581874559072302e-05, "loss": 0.1956, "step": 4161 }, { "epoch": 0.33224235650993855, "grad_norm": 0.33375523689335096, "learning_rate": 1.5579729177785777e-05, "loss": 0.1963, "step": 4162 }, { "epoch": 0.33232218408238207, "grad_norm": 0.32859632946722883, "learning_rate": 1.557758342351644e-05, "loss": 0.1807, "step": 4163 }, { "epoch": 0.3324020116548256, "grad_norm": 0.28386949922789323, "learning_rate": 1.5575437296407726e-05, "loss": 0.1652, "step": 4164 }, { "epoch": 0.3324818392272691, "grad_norm": 0.36205163494656645, "learning_rate": 1.557329079660309e-05, "loss": 0.1647, "step": 4165 }, { "epoch": 0.3325616667997126, "grad_norm": 0.3229090658048405, "learning_rate": 1.5571143924246023e-05, "loss": 0.1615, "step": 4166 }, { "epoch": 0.3326414943721561, "grad_norm": 0.2899937272751568, "learning_rate": 1.5568996679480027e-05, "loss": 0.1968, "step": 4167 }, { "epoch": 0.3327213219445997, "grad_norm": 0.293905942706719, "learning_rate": 1.5566849062448648e-05, "loss": 0.1771, "step": 4168 }, { "epoch": 0.3328011495170432, "grad_norm": 0.31198723428707226, "learning_rate": 1.5564701073295438e-05, "loss": 0.1421, "step": 4169 }, { "epoch": 0.3328809770894867, "grad_norm": 0.2873212970159234, "learning_rate": 1.5562552712163978e-05, "loss": 0.2109, "step": 4170 }, { "epoch": 0.33296080466193023, "grad_norm": 0.2853333975625798, "learning_rate": 1.5560403979197886e-05, "loss": 0.1707, "step": 4171 }, { "epoch": 0.33304063223437375, "grad_norm": 0.29691435497956387, "learning_rate": 1.5558254874540797e-05, "loss": 0.1738, "step": 4172 }, { "epoch": 0.33312045980681726, "grad_norm": 0.32228394230205093, "learning_rate": 1.555610539833636e-05, "loss": 0.201, "step": 4173 }, { "epoch": 0.3332002873792608, "grad_norm": 0.31548112812266943, "learning_rate": 1.5553955550728267e-05, "loss": 0.222, "step": 4174 }, { "epoch": 0.33328011495170434, "grad_norm": 0.3707769941256331, "learning_rate": 1.5551805331860227e-05, "loss": 0.1926, "step": 4175 }, { "epoch": 0.33335994252414786, "grad_norm": 0.2922318737635044, "learning_rate": 1.554965474187597e-05, "loss": 0.1872, "step": 4176 }, { "epoch": 0.33343977009659137, "grad_norm": 0.3613117320514181, "learning_rate": 1.5547503780919252e-05, "loss": 0.2022, "step": 4177 }, { "epoch": 0.3335195976690349, "grad_norm": 0.34046985903191096, "learning_rate": 1.5545352449133864e-05, "loss": 0.1687, "step": 4178 }, { "epoch": 0.3335994252414784, "grad_norm": 0.33711850260175513, "learning_rate": 1.554320074666361e-05, "loss": 0.1782, "step": 4179 }, { "epoch": 0.3336792528139219, "grad_norm": 0.2792105625185887, "learning_rate": 1.5541048673652324e-05, "loss": 0.1465, "step": 4180 }, { "epoch": 0.3337590803863654, "grad_norm": 0.2702428739400167, "learning_rate": 1.5538896230243857e-05, "loss": 0.2084, "step": 4181 }, { "epoch": 0.333838907958809, "grad_norm": 0.3145389686435095, "learning_rate": 1.5536743416582104e-05, "loss": 0.2313, "step": 4182 }, { "epoch": 0.3339187355312525, "grad_norm": 0.2955789505347922, "learning_rate": 1.553459023281096e-05, "loss": 0.249, "step": 4183 }, { "epoch": 0.333998563103696, "grad_norm": 0.2766124349409926, "learning_rate": 1.5532436679074363e-05, "loss": 0.1634, "step": 4184 }, { "epoch": 0.33407839067613954, "grad_norm": 0.33565044840744895, "learning_rate": 1.553028275551627e-05, "loss": 0.1698, "step": 4185 }, { "epoch": 0.33415821824858305, "grad_norm": 0.3182285705728838, "learning_rate": 1.5528128462280656e-05, "loss": 0.1936, "step": 4186 }, { "epoch": 0.33423804582102656, "grad_norm": 0.2665337542983386, "learning_rate": 1.552597379951154e-05, "loss": 0.2079, "step": 4187 }, { "epoch": 0.3343178733934701, "grad_norm": 0.32026734007988533, "learning_rate": 1.5523818767352935e-05, "loss": 0.1885, "step": 4188 }, { "epoch": 0.33439770096591365, "grad_norm": 0.3602540829759997, "learning_rate": 1.552166336594891e-05, "loss": 0.1658, "step": 4189 }, { "epoch": 0.33447752853835716, "grad_norm": 0.31098837033509075, "learning_rate": 1.5519507595443537e-05, "loss": 0.2115, "step": 4190 }, { "epoch": 0.3345573561108007, "grad_norm": 0.3169381060114191, "learning_rate": 1.5517351455980926e-05, "loss": 0.1891, "step": 4191 }, { "epoch": 0.3346371836832442, "grad_norm": 0.3324508474768619, "learning_rate": 1.5515194947705205e-05, "loss": 0.2384, "step": 4192 }, { "epoch": 0.3347170112556877, "grad_norm": 0.31898163412310376, "learning_rate": 1.5513038070760525e-05, "loss": 0.1907, "step": 4193 }, { "epoch": 0.3347968388281312, "grad_norm": 0.34991508561652024, "learning_rate": 1.551088082529107e-05, "loss": 0.225, "step": 4194 }, { "epoch": 0.3348766664005748, "grad_norm": 0.3069387591309501, "learning_rate": 1.5508723211441042e-05, "loss": 0.1565, "step": 4195 }, { "epoch": 0.3349564939730183, "grad_norm": 0.24273772659531115, "learning_rate": 1.5506565229354668e-05, "loss": 0.1964, "step": 4196 }, { "epoch": 0.3350363215454618, "grad_norm": 0.3150027816290778, "learning_rate": 1.55044068791762e-05, "loss": 0.1829, "step": 4197 }, { "epoch": 0.3351161491179053, "grad_norm": 0.304570948674493, "learning_rate": 1.550224816104991e-05, "loss": 0.1639, "step": 4198 }, { "epoch": 0.33519597669034884, "grad_norm": 0.33408836197263214, "learning_rate": 1.5500089075120114e-05, "loss": 0.1781, "step": 4199 }, { "epoch": 0.33527580426279235, "grad_norm": 0.2888294092729188, "learning_rate": 1.5497929621531125e-05, "loss": 0.2146, "step": 4200 }, { "epoch": 0.33535563183523587, "grad_norm": 0.28594476655825013, "learning_rate": 1.5495769800427297e-05, "loss": 0.1922, "step": 4201 }, { "epoch": 0.33543545940767944, "grad_norm": 0.3679874902197786, "learning_rate": 1.5493609611953008e-05, "loss": 0.2079, "step": 4202 }, { "epoch": 0.33551528698012295, "grad_norm": 0.2855991610364339, "learning_rate": 1.5491449056252657e-05, "loss": 0.2087, "step": 4203 }, { "epoch": 0.33559511455256646, "grad_norm": 0.3348001624349213, "learning_rate": 1.548928813347067e-05, "loss": 0.1724, "step": 4204 }, { "epoch": 0.33567494212501, "grad_norm": 0.33215311225315264, "learning_rate": 1.548712684375149e-05, "loss": 0.2262, "step": 4205 }, { "epoch": 0.3357547696974535, "grad_norm": 0.29903460227194634, "learning_rate": 1.54849651872396e-05, "loss": 0.2523, "step": 4206 }, { "epoch": 0.335834597269897, "grad_norm": 0.31151634255394145, "learning_rate": 1.5482803164079493e-05, "loss": 0.2084, "step": 4207 }, { "epoch": 0.3359144248423405, "grad_norm": 0.3530856371545729, "learning_rate": 1.5480640774415693e-05, "loss": 0.1882, "step": 4208 }, { "epoch": 0.3359942524147841, "grad_norm": 0.37584577817587655, "learning_rate": 1.5478478018392742e-05, "loss": 0.146, "step": 4209 }, { "epoch": 0.3360740799872276, "grad_norm": 0.31694881316808354, "learning_rate": 1.547631489615522e-05, "loss": 0.159, "step": 4210 }, { "epoch": 0.3361539075596711, "grad_norm": 0.31748809241918213, "learning_rate": 1.547415140784772e-05, "loss": 0.1786, "step": 4211 }, { "epoch": 0.33623373513211463, "grad_norm": 0.2878582885490682, "learning_rate": 1.547198755361486e-05, "loss": 0.2212, "step": 4212 }, { "epoch": 0.33631356270455814, "grad_norm": 0.2999052995270133, "learning_rate": 1.546982333360129e-05, "loss": 0.209, "step": 4213 }, { "epoch": 0.33639339027700166, "grad_norm": 0.2984018452707327, "learning_rate": 1.5467658747951674e-05, "loss": 0.1766, "step": 4214 }, { "epoch": 0.33647321784944517, "grad_norm": 0.3299535158599034, "learning_rate": 1.5465493796810704e-05, "loss": 0.1886, "step": 4215 }, { "epoch": 0.33655304542188874, "grad_norm": 0.32539533390093184, "learning_rate": 1.546332848032311e-05, "loss": 0.1955, "step": 4216 }, { "epoch": 0.33663287299433226, "grad_norm": 0.2668500384435885, "learning_rate": 1.5461162798633623e-05, "loss": 0.1773, "step": 4217 }, { "epoch": 0.33671270056677577, "grad_norm": 0.30600904953338803, "learning_rate": 1.5458996751887018e-05, "loss": 0.1645, "step": 4218 }, { "epoch": 0.3367925281392193, "grad_norm": 0.43057417811246623, "learning_rate": 1.5456830340228083e-05, "loss": 0.1912, "step": 4219 }, { "epoch": 0.3368723557116628, "grad_norm": 0.40211864111618595, "learning_rate": 1.5454663563801635e-05, "loss": 0.2203, "step": 4220 }, { "epoch": 0.3369521832841063, "grad_norm": 0.38006950184835886, "learning_rate": 1.5452496422752515e-05, "loss": 0.1668, "step": 4221 }, { "epoch": 0.3370320108565499, "grad_norm": 0.2781554003669351, "learning_rate": 1.5450328917225585e-05, "loss": 0.1665, "step": 4222 }, { "epoch": 0.3371118384289934, "grad_norm": 0.2960491670238735, "learning_rate": 1.5448161047365736e-05, "loss": 0.2216, "step": 4223 }, { "epoch": 0.3371916660014369, "grad_norm": 0.3550018398135809, "learning_rate": 1.5445992813317884e-05, "loss": 0.1606, "step": 4224 }, { "epoch": 0.3372714935738804, "grad_norm": 0.34071159284758834, "learning_rate": 1.5443824215226964e-05, "loss": 0.1987, "step": 4225 }, { "epoch": 0.33735132114632393, "grad_norm": 0.30992874885878985, "learning_rate": 1.5441655253237937e-05, "loss": 0.1919, "step": 4226 }, { "epoch": 0.33743114871876745, "grad_norm": 0.24641075271936771, "learning_rate": 1.543948592749579e-05, "loss": 0.1718, "step": 4227 }, { "epoch": 0.33751097629121096, "grad_norm": 0.2958471925831916, "learning_rate": 1.5437316238145538e-05, "loss": 0.196, "step": 4228 }, { "epoch": 0.33759080386365453, "grad_norm": 0.35154424588076366, "learning_rate": 1.543514618533221e-05, "loss": 0.1795, "step": 4229 }, { "epoch": 0.33767063143609805, "grad_norm": 0.30471084878394694, "learning_rate": 1.5432975769200873e-05, "loss": 0.1789, "step": 4230 }, { "epoch": 0.33775045900854156, "grad_norm": 0.2978666762041338, "learning_rate": 1.5430804989896606e-05, "loss": 0.1881, "step": 4231 }, { "epoch": 0.3378302865809851, "grad_norm": 0.2609736440012928, "learning_rate": 1.5428633847564518e-05, "loss": 0.1584, "step": 4232 }, { "epoch": 0.3379101141534286, "grad_norm": 0.3069926165673516, "learning_rate": 1.5426462342349736e-05, "loss": 0.1881, "step": 4233 }, { "epoch": 0.3379899417258721, "grad_norm": 0.3563606755508727, "learning_rate": 1.5424290474397424e-05, "loss": 0.1887, "step": 4234 }, { "epoch": 0.3380697692983156, "grad_norm": 0.3256156003470817, "learning_rate": 1.542211824385276e-05, "loss": 0.1693, "step": 4235 }, { "epoch": 0.3381495968707592, "grad_norm": 0.2945787965507447, "learning_rate": 1.541994565086095e-05, "loss": 0.2392, "step": 4236 }, { "epoch": 0.3382294244432027, "grad_norm": 0.4891869389987488, "learning_rate": 1.541777269556722e-05, "loss": 0.2106, "step": 4237 }, { "epoch": 0.3383092520156462, "grad_norm": 0.3157137557100618, "learning_rate": 1.5415599378116825e-05, "loss": 0.1374, "step": 4238 }, { "epoch": 0.3383890795880897, "grad_norm": 0.3076495401240401, "learning_rate": 1.5413425698655047e-05, "loss": 0.1615, "step": 4239 }, { "epoch": 0.33846890716053324, "grad_norm": 0.3000366754336619, "learning_rate": 1.5411251657327185e-05, "loss": 0.1901, "step": 4240 }, { "epoch": 0.33854873473297675, "grad_norm": 0.30333185088231734, "learning_rate": 1.540907725427856e-05, "loss": 0.2486, "step": 4241 }, { "epoch": 0.33862856230542027, "grad_norm": 0.3656177034899878, "learning_rate": 1.540690248965453e-05, "loss": 0.1896, "step": 4242 }, { "epoch": 0.33870838987786384, "grad_norm": 0.3319918368376026, "learning_rate": 1.5404727363600462e-05, "loss": 0.2176, "step": 4243 }, { "epoch": 0.33878821745030735, "grad_norm": 0.31411462127763873, "learning_rate": 1.5402551876261764e-05, "loss": 0.1545, "step": 4244 }, { "epoch": 0.33886804502275086, "grad_norm": 0.33734949930159697, "learning_rate": 1.5400376027783846e-05, "loss": 0.1678, "step": 4245 }, { "epoch": 0.3389478725951944, "grad_norm": 0.32863250226931534, "learning_rate": 1.5398199818312166e-05, "loss": 0.1684, "step": 4246 }, { "epoch": 0.3390277001676379, "grad_norm": 0.3358929337549648, "learning_rate": 1.5396023247992195e-05, "loss": 0.2288, "step": 4247 }, { "epoch": 0.3391075277400814, "grad_norm": 0.3475969730862525, "learning_rate": 1.539384631696942e-05, "loss": 0.206, "step": 4248 }, { "epoch": 0.3391873553125249, "grad_norm": 0.36845640257817935, "learning_rate": 1.5391669025389364e-05, "loss": 0.1736, "step": 4249 }, { "epoch": 0.3392671828849685, "grad_norm": 0.282263556268136, "learning_rate": 1.5389491373397572e-05, "loss": 0.1861, "step": 4250 }, { "epoch": 0.339347010457412, "grad_norm": 0.34657640329132106, "learning_rate": 1.538731336113961e-05, "loss": 0.2032, "step": 4251 }, { "epoch": 0.3394268380298555, "grad_norm": 0.30626574460976996, "learning_rate": 1.5385134988761067e-05, "loss": 0.1883, "step": 4252 }, { "epoch": 0.33950666560229903, "grad_norm": 0.32772108392028215, "learning_rate": 1.5382956256407564e-05, "loss": 0.2014, "step": 4253 }, { "epoch": 0.33958649317474254, "grad_norm": 0.3313523566268601, "learning_rate": 1.5380777164224737e-05, "loss": 0.2027, "step": 4254 }, { "epoch": 0.33966632074718606, "grad_norm": 0.27721910982447723, "learning_rate": 1.5378597712358253e-05, "loss": 0.1976, "step": 4255 }, { "epoch": 0.3397461483196296, "grad_norm": 0.2830694795584243, "learning_rate": 1.5376417900953795e-05, "loss": 0.1756, "step": 4256 }, { "epoch": 0.33982597589207314, "grad_norm": 0.29050418804847733, "learning_rate": 1.537423773015708e-05, "loss": 0.1584, "step": 4257 }, { "epoch": 0.33990580346451665, "grad_norm": 0.29377219964029366, "learning_rate": 1.5372057200113838e-05, "loss": 0.1327, "step": 4258 }, { "epoch": 0.33998563103696017, "grad_norm": 0.318039418483187, "learning_rate": 1.536987631096983e-05, "loss": 0.1901, "step": 4259 }, { "epoch": 0.3400654586094037, "grad_norm": 0.3358865721522435, "learning_rate": 1.5367695062870846e-05, "loss": 0.152, "step": 4260 }, { "epoch": 0.3401452861818472, "grad_norm": 0.26407904827574086, "learning_rate": 1.5365513455962687e-05, "loss": 0.1702, "step": 4261 }, { "epoch": 0.3402251137542907, "grad_norm": 0.30632395765021136, "learning_rate": 1.5363331490391186e-05, "loss": 0.1505, "step": 4262 }, { "epoch": 0.3403049413267343, "grad_norm": 0.2756531074902728, "learning_rate": 1.5361149166302202e-05, "loss": 0.1803, "step": 4263 }, { "epoch": 0.3403847688991778, "grad_norm": 0.32927964465401993, "learning_rate": 1.535896648384161e-05, "loss": 0.1418, "step": 4264 }, { "epoch": 0.3404645964716213, "grad_norm": 0.2996748592112855, "learning_rate": 1.5356783443155317e-05, "loss": 0.1974, "step": 4265 }, { "epoch": 0.3405444240440648, "grad_norm": 0.31349432568460134, "learning_rate": 1.5354600044389253e-05, "loss": 0.1822, "step": 4266 }, { "epoch": 0.34062425161650833, "grad_norm": 0.2852941766894278, "learning_rate": 1.5352416287689363e-05, "loss": 0.1748, "step": 4267 }, { "epoch": 0.34070407918895185, "grad_norm": 0.34054350162496394, "learning_rate": 1.5350232173201627e-05, "loss": 0.2147, "step": 4268 }, { "epoch": 0.34078390676139536, "grad_norm": 0.30239516055235977, "learning_rate": 1.534804770107204e-05, "loss": 0.1784, "step": 4269 }, { "epoch": 0.34086373433383893, "grad_norm": 0.24799962014483914, "learning_rate": 1.5345862871446637e-05, "loss": 0.1839, "step": 4270 }, { "epoch": 0.34094356190628244, "grad_norm": 0.30116304115089865, "learning_rate": 1.534367768447145e-05, "loss": 0.1684, "step": 4271 }, { "epoch": 0.34102338947872596, "grad_norm": 0.28757371792399566, "learning_rate": 1.534149214029256e-05, "loss": 0.2264, "step": 4272 }, { "epoch": 0.34110321705116947, "grad_norm": 0.35530143279188564, "learning_rate": 1.533930623905606e-05, "loss": 0.2092, "step": 4273 }, { "epoch": 0.341183044623613, "grad_norm": 0.2685998982698338, "learning_rate": 1.5337119980908064e-05, "loss": 0.229, "step": 4274 }, { "epoch": 0.3412628721960565, "grad_norm": 0.36436099574240205, "learning_rate": 1.533493336599472e-05, "loss": 0.1659, "step": 4275 }, { "epoch": 0.3413426997685, "grad_norm": 0.42534725481716074, "learning_rate": 1.5332746394462192e-05, "loss": 0.1793, "step": 4276 }, { "epoch": 0.3414225273409436, "grad_norm": 0.31518264578777694, "learning_rate": 1.5330559066456673e-05, "loss": 0.1745, "step": 4277 }, { "epoch": 0.3415023549133871, "grad_norm": 0.3501137075632433, "learning_rate": 1.5328371382124373e-05, "loss": 0.1831, "step": 4278 }, { "epoch": 0.3415821824858306, "grad_norm": 0.24764888123740705, "learning_rate": 1.5326183341611538e-05, "loss": 0.2093, "step": 4279 }, { "epoch": 0.3416620100582741, "grad_norm": 0.3335010201156387, "learning_rate": 1.532399494506442e-05, "loss": 0.2612, "step": 4280 }, { "epoch": 0.34174183763071764, "grad_norm": 0.2979650577085377, "learning_rate": 1.532180619262931e-05, "loss": 0.1643, "step": 4281 }, { "epoch": 0.34182166520316115, "grad_norm": 0.2642402890576368, "learning_rate": 1.5319617084452517e-05, "loss": 0.1792, "step": 4282 }, { "epoch": 0.3419014927756047, "grad_norm": 0.32867456789535565, "learning_rate": 1.5317427620680377e-05, "loss": 0.2106, "step": 4283 }, { "epoch": 0.34198132034804823, "grad_norm": 0.3122940658916636, "learning_rate": 1.531523780145924e-05, "loss": 0.184, "step": 4284 }, { "epoch": 0.34206114792049175, "grad_norm": 0.3087592885544258, "learning_rate": 1.5313047626935494e-05, "loss": 0.1595, "step": 4285 }, { "epoch": 0.34214097549293526, "grad_norm": 0.3042506998543318, "learning_rate": 1.5310857097255535e-05, "loss": 0.158, "step": 4286 }, { "epoch": 0.3422208030653788, "grad_norm": 0.27642428459938384, "learning_rate": 1.53086662125658e-05, "loss": 0.1472, "step": 4287 }, { "epoch": 0.3423006306378223, "grad_norm": 0.28040240890089546, "learning_rate": 1.5306474973012736e-05, "loss": 0.1616, "step": 4288 }, { "epoch": 0.3423804582102658, "grad_norm": 0.3007324883941264, "learning_rate": 1.5304283378742822e-05, "loss": 0.2016, "step": 4289 }, { "epoch": 0.3424602857827094, "grad_norm": 0.3175099813828796, "learning_rate": 1.5302091429902552e-05, "loss": 0.2834, "step": 4290 }, { "epoch": 0.3425401133551529, "grad_norm": 0.3055628905409725, "learning_rate": 1.5299899126638456e-05, "loss": 0.2106, "step": 4291 }, { "epoch": 0.3426199409275964, "grad_norm": 0.26209552034414396, "learning_rate": 1.529770646909708e-05, "loss": 0.1578, "step": 4292 }, { "epoch": 0.3426997685000399, "grad_norm": 0.33993731388776705, "learning_rate": 1.5295513457424983e-05, "loss": 0.1791, "step": 4293 }, { "epoch": 0.34277959607248343, "grad_norm": 0.33578423759301784, "learning_rate": 1.5293320091768772e-05, "loss": 0.1724, "step": 4294 }, { "epoch": 0.34285942364492694, "grad_norm": 0.3113786201677466, "learning_rate": 1.529112637227506e-05, "loss": 0.2017, "step": 4295 }, { "epoch": 0.34293925121737046, "grad_norm": 0.27690100770091397, "learning_rate": 1.5288932299090494e-05, "loss": 0.1779, "step": 4296 }, { "epoch": 0.343019078789814, "grad_norm": 0.3196010890171405, "learning_rate": 1.528673787236173e-05, "loss": 0.1589, "step": 4297 }, { "epoch": 0.34309890636225754, "grad_norm": 0.31202035840264475, "learning_rate": 1.5284543092235464e-05, "loss": 0.1574, "step": 4298 }, { "epoch": 0.34317873393470105, "grad_norm": 0.30788823602482934, "learning_rate": 1.5282347958858403e-05, "loss": 0.2131, "step": 4299 }, { "epoch": 0.34325856150714457, "grad_norm": 0.294756585515815, "learning_rate": 1.5280152472377286e-05, "loss": 0.2397, "step": 4300 }, { "epoch": 0.3433383890795881, "grad_norm": 0.28563563519456425, "learning_rate": 1.5277956632938873e-05, "loss": 0.1666, "step": 4301 }, { "epoch": 0.3434182166520316, "grad_norm": 0.3083640882955131, "learning_rate": 1.5275760440689942e-05, "loss": 0.1802, "step": 4302 }, { "epoch": 0.3434980442244751, "grad_norm": 0.32687279378041545, "learning_rate": 1.527356389577731e-05, "loss": 0.1815, "step": 4303 }, { "epoch": 0.3435778717969187, "grad_norm": 0.26407702096539476, "learning_rate": 1.5271366998347795e-05, "loss": 0.1802, "step": 4304 }, { "epoch": 0.3436576993693622, "grad_norm": 0.3134351034132433, "learning_rate": 1.5269169748548262e-05, "loss": 0.1851, "step": 4305 }, { "epoch": 0.3437375269418057, "grad_norm": 0.3044876036163513, "learning_rate": 1.5266972146525585e-05, "loss": 0.1706, "step": 4306 }, { "epoch": 0.3438173545142492, "grad_norm": 0.4298821447890257, "learning_rate": 1.526477419242666e-05, "loss": 0.2159, "step": 4307 }, { "epoch": 0.34389718208669273, "grad_norm": 0.3056531849040232, "learning_rate": 1.5262575886398417e-05, "loss": 0.2038, "step": 4308 }, { "epoch": 0.34397700965913625, "grad_norm": 0.4210898697126653, "learning_rate": 1.5260377228587803e-05, "loss": 0.1736, "step": 4309 }, { "epoch": 0.3440568372315798, "grad_norm": 0.29436344960549626, "learning_rate": 1.5258178219141785e-05, "loss": 0.2188, "step": 4310 }, { "epoch": 0.34413666480402333, "grad_norm": 0.3763788599982711, "learning_rate": 1.525597885820736e-05, "loss": 0.1668, "step": 4311 }, { "epoch": 0.34421649237646684, "grad_norm": 0.4248581664758213, "learning_rate": 1.5253779145931553e-05, "loss": 0.1721, "step": 4312 }, { "epoch": 0.34429631994891036, "grad_norm": 0.38043607028923, "learning_rate": 1.5251579082461403e-05, "loss": 0.1552, "step": 4313 }, { "epoch": 0.34437614752135387, "grad_norm": 0.39991883879975043, "learning_rate": 1.5249378667943973e-05, "loss": 0.1896, "step": 4314 }, { "epoch": 0.3444559750937974, "grad_norm": 0.3820240369094609, "learning_rate": 1.524717790252635e-05, "loss": 0.1649, "step": 4315 }, { "epoch": 0.3445358026662409, "grad_norm": 0.39288340685934237, "learning_rate": 1.5244976786355654e-05, "loss": 0.1727, "step": 4316 }, { "epoch": 0.34461563023868447, "grad_norm": 0.3349454517168851, "learning_rate": 1.5242775319579011e-05, "loss": 0.1965, "step": 4317 }, { "epoch": 0.344695457811128, "grad_norm": 0.3867109694497079, "learning_rate": 1.524057350234359e-05, "loss": 0.1661, "step": 4318 }, { "epoch": 0.3447752853835715, "grad_norm": 0.32559387264996564, "learning_rate": 1.5238371334796566e-05, "loss": 0.1683, "step": 4319 }, { "epoch": 0.344855112956015, "grad_norm": 0.4023638080216273, "learning_rate": 1.5236168817085153e-05, "loss": 0.1672, "step": 4320 }, { "epoch": 0.3449349405284585, "grad_norm": 0.31947063022975214, "learning_rate": 1.5233965949356577e-05, "loss": 0.1663, "step": 4321 }, { "epoch": 0.34501476810090204, "grad_norm": 0.3157073084240362, "learning_rate": 1.5231762731758087e-05, "loss": 0.157, "step": 4322 }, { "epoch": 0.34509459567334555, "grad_norm": 0.35071862780639423, "learning_rate": 1.5229559164436963e-05, "loss": 0.1782, "step": 4323 }, { "epoch": 0.3451744232457891, "grad_norm": 0.3158238228599118, "learning_rate": 1.5227355247540503e-05, "loss": 0.176, "step": 4324 }, { "epoch": 0.34525425081823263, "grad_norm": 0.351537398745697, "learning_rate": 1.5225150981216032e-05, "loss": 0.1706, "step": 4325 }, { "epoch": 0.34533407839067615, "grad_norm": 0.34420453854652294, "learning_rate": 1.5222946365610898e-05, "loss": 0.2136, "step": 4326 }, { "epoch": 0.34541390596311966, "grad_norm": 0.362332859468127, "learning_rate": 1.522074140087247e-05, "loss": 0.1717, "step": 4327 }, { "epoch": 0.3454937335355632, "grad_norm": 0.32162418186087555, "learning_rate": 1.5218536087148137e-05, "loss": 0.1933, "step": 4328 }, { "epoch": 0.3455735611080067, "grad_norm": 0.304969722789877, "learning_rate": 1.5216330424585322e-05, "loss": 0.1789, "step": 4329 }, { "epoch": 0.3456533886804502, "grad_norm": 0.3236737769492595, "learning_rate": 1.521412441333146e-05, "loss": 0.2049, "step": 4330 }, { "epoch": 0.34573321625289377, "grad_norm": 0.29302710726924236, "learning_rate": 1.5211918053534014e-05, "loss": 0.1919, "step": 4331 }, { "epoch": 0.3458130438253373, "grad_norm": 0.3247763392447692, "learning_rate": 1.520971134534047e-05, "loss": 0.1564, "step": 4332 }, { "epoch": 0.3458928713977808, "grad_norm": 0.36224137208061397, "learning_rate": 1.5207504288898341e-05, "loss": 0.1564, "step": 4333 }, { "epoch": 0.3459726989702243, "grad_norm": 0.30805827842356626, "learning_rate": 1.5205296884355161e-05, "loss": 0.16, "step": 4334 }, { "epoch": 0.3460525265426678, "grad_norm": 0.3074313384558833, "learning_rate": 1.520308913185848e-05, "loss": 0.1639, "step": 4335 }, { "epoch": 0.34613235411511134, "grad_norm": 0.3101528519697983, "learning_rate": 1.5200881031555887e-05, "loss": 0.2887, "step": 4336 }, { "epoch": 0.34621218168755485, "grad_norm": 0.4215150504302158, "learning_rate": 1.5198672583594976e-05, "loss": 0.1572, "step": 4337 }, { "epoch": 0.3462920092599984, "grad_norm": 0.31435757222857996, "learning_rate": 1.5196463788123374e-05, "loss": 0.1761, "step": 4338 }, { "epoch": 0.34637183683244194, "grad_norm": 0.2811482445619922, "learning_rate": 1.5194254645288736e-05, "loss": 0.1895, "step": 4339 }, { "epoch": 0.34645166440488545, "grad_norm": 0.27062054503543376, "learning_rate": 1.5192045155238728e-05, "loss": 0.1648, "step": 4340 }, { "epoch": 0.34653149197732896, "grad_norm": 0.3020967319889206, "learning_rate": 1.518983531812105e-05, "loss": 0.1524, "step": 4341 }, { "epoch": 0.3466113195497725, "grad_norm": 0.31401173644182867, "learning_rate": 1.5187625134083418e-05, "loss": 0.1702, "step": 4342 }, { "epoch": 0.346691147122216, "grad_norm": 0.26745134951130556, "learning_rate": 1.5185414603273576e-05, "loss": 0.2264, "step": 4343 }, { "epoch": 0.34677097469465956, "grad_norm": 0.3004676268207613, "learning_rate": 1.518320372583929e-05, "loss": 0.2383, "step": 4344 }, { "epoch": 0.3468508022671031, "grad_norm": 0.2845984098253607, "learning_rate": 1.5180992501928349e-05, "loss": 0.2565, "step": 4345 }, { "epoch": 0.3469306298395466, "grad_norm": 0.3050167680596054, "learning_rate": 1.5178780931688559e-05, "loss": 0.1555, "step": 4346 }, { "epoch": 0.3470104574119901, "grad_norm": 0.3135127180136655, "learning_rate": 1.5176569015267759e-05, "loss": 0.161, "step": 4347 }, { "epoch": 0.3470902849844336, "grad_norm": 0.29268507225211265, "learning_rate": 1.5174356752813806e-05, "loss": 0.1869, "step": 4348 }, { "epoch": 0.34717011255687713, "grad_norm": 0.2591216245672409, "learning_rate": 1.5172144144474582e-05, "loss": 0.174, "step": 4349 }, { "epoch": 0.34724994012932064, "grad_norm": 0.2908620353642254, "learning_rate": 1.5169931190397989e-05, "loss": 0.2178, "step": 4350 }, { "epoch": 0.3473297677017642, "grad_norm": 0.36569184145232003, "learning_rate": 1.5167717890731957e-05, "loss": 0.2055, "step": 4351 }, { "epoch": 0.3474095952742077, "grad_norm": 0.28905049128498733, "learning_rate": 1.5165504245624433e-05, "loss": 0.1801, "step": 4352 }, { "epoch": 0.34748942284665124, "grad_norm": 0.29807572607306826, "learning_rate": 1.5163290255223396e-05, "loss": 0.1444, "step": 4353 }, { "epoch": 0.34756925041909476, "grad_norm": 0.3260180000709404, "learning_rate": 1.5161075919676834e-05, "loss": 0.1808, "step": 4354 }, { "epoch": 0.34764907799153827, "grad_norm": 0.34632741437047976, "learning_rate": 1.5158861239132776e-05, "loss": 0.1953, "step": 4355 }, { "epoch": 0.3477289055639818, "grad_norm": 0.290742048256356, "learning_rate": 1.5156646213739254e-05, "loss": 0.2023, "step": 4356 }, { "epoch": 0.3478087331364253, "grad_norm": 0.3163638758736078, "learning_rate": 1.5154430843644343e-05, "loss": 0.1721, "step": 4357 }, { "epoch": 0.34788856070886887, "grad_norm": 0.30041405597172677, "learning_rate": 1.5152215128996131e-05, "loss": 0.2122, "step": 4358 }, { "epoch": 0.3479683882813124, "grad_norm": 0.27369183887917037, "learning_rate": 1.5149999069942722e-05, "loss": 0.2078, "step": 4359 }, { "epoch": 0.3480482158537559, "grad_norm": 0.3092675816746529, "learning_rate": 1.514778266663226e-05, "loss": 0.2218, "step": 4360 }, { "epoch": 0.3481280434261994, "grad_norm": 0.30644334672316237, "learning_rate": 1.5145565919212898e-05, "loss": 0.2161, "step": 4361 }, { "epoch": 0.3482078709986429, "grad_norm": 0.3489709738509544, "learning_rate": 1.5143348827832815e-05, "loss": 0.1854, "step": 4362 }, { "epoch": 0.34828769857108643, "grad_norm": 0.28169135571601417, "learning_rate": 1.5141131392640218e-05, "loss": 0.198, "step": 4363 }, { "epoch": 0.34836752614352995, "grad_norm": 0.271520739929936, "learning_rate": 1.5138913613783337e-05, "loss": 0.1716, "step": 4364 }, { "epoch": 0.3484473537159735, "grad_norm": 0.2771425984024754, "learning_rate": 1.5136695491410413e-05, "loss": 0.1909, "step": 4365 }, { "epoch": 0.34852718128841703, "grad_norm": 0.33876459563708533, "learning_rate": 1.5134477025669724e-05, "loss": 0.1768, "step": 4366 }, { "epoch": 0.34860700886086055, "grad_norm": 0.2698710110125971, "learning_rate": 1.513225821670957e-05, "loss": 0.1341, "step": 4367 }, { "epoch": 0.34868683643330406, "grad_norm": 0.29426995222989816, "learning_rate": 1.513003906467826e-05, "loss": 0.166, "step": 4368 }, { "epoch": 0.3487666640057476, "grad_norm": 0.3360907884297998, "learning_rate": 1.5127819569724143e-05, "loss": 0.1953, "step": 4369 }, { "epoch": 0.3488464915781911, "grad_norm": 0.29643426902453707, "learning_rate": 1.5125599731995575e-05, "loss": 0.2516, "step": 4370 }, { "epoch": 0.34892631915063466, "grad_norm": 0.4108100230734512, "learning_rate": 1.5123379551640955e-05, "loss": 0.1743, "step": 4371 }, { "epoch": 0.34900614672307817, "grad_norm": 0.30966176639741505, "learning_rate": 1.5121159028808685e-05, "loss": 0.1415, "step": 4372 }, { "epoch": 0.3490859742955217, "grad_norm": 0.33403510904825323, "learning_rate": 1.5118938163647203e-05, "loss": 0.1769, "step": 4373 }, { "epoch": 0.3491658018679652, "grad_norm": 0.34669567991770117, "learning_rate": 1.5116716956304957e-05, "loss": 0.1558, "step": 4374 }, { "epoch": 0.3492456294404087, "grad_norm": 0.3434831960406143, "learning_rate": 1.5114495406930439e-05, "loss": 0.1579, "step": 4375 }, { "epoch": 0.3493254570128522, "grad_norm": 0.36994575529872015, "learning_rate": 1.5112273515672136e-05, "loss": 0.2038, "step": 4376 }, { "epoch": 0.34940528458529574, "grad_norm": 0.2948042414196569, "learning_rate": 1.5110051282678587e-05, "loss": 0.1745, "step": 4377 }, { "epoch": 0.3494851121577393, "grad_norm": 0.2806357179086667, "learning_rate": 1.5107828708098327e-05, "loss": 0.2281, "step": 4378 }, { "epoch": 0.3495649397301828, "grad_norm": 0.3281705008497325, "learning_rate": 1.5105605792079932e-05, "loss": 0.1813, "step": 4379 }, { "epoch": 0.34964476730262634, "grad_norm": 0.30275554545364336, "learning_rate": 1.5103382534771996e-05, "loss": 0.1649, "step": 4380 }, { "epoch": 0.34972459487506985, "grad_norm": 0.28489451695803175, "learning_rate": 1.5101158936323134e-05, "loss": 0.1714, "step": 4381 }, { "epoch": 0.34980442244751336, "grad_norm": 0.2803030060648698, "learning_rate": 1.5098934996881985e-05, "loss": 0.1888, "step": 4382 }, { "epoch": 0.3498842500199569, "grad_norm": 0.2735283584090767, "learning_rate": 1.509671071659721e-05, "loss": 0.1776, "step": 4383 }, { "epoch": 0.3499640775924004, "grad_norm": 0.35210162474268186, "learning_rate": 1.5094486095617491e-05, "loss": 0.199, "step": 4384 }, { "epoch": 0.35004390516484396, "grad_norm": 0.27142135227224645, "learning_rate": 1.509226113409154e-05, "loss": 0.2127, "step": 4385 }, { "epoch": 0.3501237327372875, "grad_norm": 0.2947887031916416, "learning_rate": 1.5090035832168087e-05, "loss": 0.2075, "step": 4386 }, { "epoch": 0.350203560309731, "grad_norm": 0.30929363300304824, "learning_rate": 1.5087810189995875e-05, "loss": 0.1576, "step": 4387 }, { "epoch": 0.3502833878821745, "grad_norm": 0.2999646747220328, "learning_rate": 1.5085584207723691e-05, "loss": 0.1381, "step": 4388 }, { "epoch": 0.350363215454618, "grad_norm": 0.3396911049753795, "learning_rate": 1.508335788550033e-05, "loss": 0.1451, "step": 4389 }, { "epoch": 0.35044304302706153, "grad_norm": 0.30428924233866284, "learning_rate": 1.5081131223474608e-05, "loss": 0.2084, "step": 4390 }, { "epoch": 0.35052287059950504, "grad_norm": 0.33537529982787634, "learning_rate": 1.5078904221795374e-05, "loss": 0.2159, "step": 4391 }, { "epoch": 0.3506026981719486, "grad_norm": 0.29897862393128516, "learning_rate": 1.507667688061149e-05, "loss": 0.1563, "step": 4392 }, { "epoch": 0.3506825257443921, "grad_norm": 0.31495685610779534, "learning_rate": 1.5074449200071849e-05, "loss": 0.1777, "step": 4393 }, { "epoch": 0.35076235331683564, "grad_norm": 0.33279209592711534, "learning_rate": 1.507222118032536e-05, "loss": 0.2132, "step": 4394 }, { "epoch": 0.35084218088927915, "grad_norm": 0.3235873030342572, "learning_rate": 1.5069992821520958e-05, "loss": 0.1911, "step": 4395 }, { "epoch": 0.35092200846172267, "grad_norm": 0.38314642382290065, "learning_rate": 1.50677641238076e-05, "loss": 0.2195, "step": 4396 }, { "epoch": 0.3510018360341662, "grad_norm": 0.33950096318806133, "learning_rate": 1.5065535087334266e-05, "loss": 0.1355, "step": 4397 }, { "epoch": 0.35108166360660975, "grad_norm": 0.33822900122408306, "learning_rate": 1.5063305712249957e-05, "loss": 0.2276, "step": 4398 }, { "epoch": 0.35116149117905326, "grad_norm": 0.30395124906219906, "learning_rate": 1.50610759987037e-05, "loss": 0.2546, "step": 4399 }, { "epoch": 0.3512413187514968, "grad_norm": 0.31009997011552876, "learning_rate": 1.505884594684454e-05, "loss": 0.1426, "step": 4400 }, { "epoch": 0.3513211463239403, "grad_norm": 0.36175557433516586, "learning_rate": 1.505661555682155e-05, "loss": 0.1594, "step": 4401 }, { "epoch": 0.3514009738963838, "grad_norm": 0.28824335775075977, "learning_rate": 1.505438482878382e-05, "loss": 0.2541, "step": 4402 }, { "epoch": 0.3514808014688273, "grad_norm": 0.3092823436042206, "learning_rate": 1.5052153762880469e-05, "loss": 0.1591, "step": 4403 }, { "epoch": 0.35156062904127083, "grad_norm": 0.2697687282800857, "learning_rate": 1.5049922359260629e-05, "loss": 0.2039, "step": 4404 }, { "epoch": 0.3516404566137144, "grad_norm": 0.31709838720371436, "learning_rate": 1.5047690618073464e-05, "loss": 0.1565, "step": 4405 }, { "epoch": 0.3517202841861579, "grad_norm": 0.252187686984675, "learning_rate": 1.504545853946816e-05, "loss": 0.2181, "step": 4406 }, { "epoch": 0.35180011175860143, "grad_norm": 0.3166414478891567, "learning_rate": 1.5043226123593914e-05, "loss": 0.1541, "step": 4407 }, { "epoch": 0.35187993933104494, "grad_norm": 0.35843301720345677, "learning_rate": 1.5040993370599965e-05, "loss": 0.1986, "step": 4408 }, { "epoch": 0.35195976690348846, "grad_norm": 0.2997030172315885, "learning_rate": 1.5038760280635556e-05, "loss": 0.1717, "step": 4409 }, { "epoch": 0.35203959447593197, "grad_norm": 0.2475509936368796, "learning_rate": 1.5036526853849967e-05, "loss": 0.1729, "step": 4410 }, { "epoch": 0.3521194220483755, "grad_norm": 0.24619611199072494, "learning_rate": 1.5034293090392487e-05, "loss": 0.2151, "step": 4411 }, { "epoch": 0.35219924962081905, "grad_norm": 0.3671045692631876, "learning_rate": 1.5032058990412433e-05, "loss": 0.1805, "step": 4412 }, { "epoch": 0.35227907719326257, "grad_norm": 0.28006429338233413, "learning_rate": 1.5029824554059156e-05, "loss": 0.1613, "step": 4413 }, { "epoch": 0.3523589047657061, "grad_norm": 0.3009235367359655, "learning_rate": 1.5027589781482007e-05, "loss": 0.2076, "step": 4414 }, { "epoch": 0.3524387323381496, "grad_norm": 0.2818903361145328, "learning_rate": 1.502535467283038e-05, "loss": 0.1868, "step": 4415 }, { "epoch": 0.3525185599105931, "grad_norm": 0.24884324238348118, "learning_rate": 1.502311922825368e-05, "loss": 0.1225, "step": 4416 }, { "epoch": 0.3525983874830366, "grad_norm": 0.2919213125518698, "learning_rate": 1.502088344790134e-05, "loss": 0.1918, "step": 4417 }, { "epoch": 0.35267821505548014, "grad_norm": 0.2665916087070878, "learning_rate": 1.5018647331922809e-05, "loss": 0.1797, "step": 4418 }, { "epoch": 0.3527580426279237, "grad_norm": 0.3000594750040635, "learning_rate": 1.5016410880467567e-05, "loss": 0.208, "step": 4419 }, { "epoch": 0.3528378702003672, "grad_norm": 0.25953023359145744, "learning_rate": 1.5014174093685108e-05, "loss": 0.186, "step": 4420 }, { "epoch": 0.35291769777281073, "grad_norm": 0.2946964122487719, "learning_rate": 1.5011936971724955e-05, "loss": 0.1769, "step": 4421 }, { "epoch": 0.35299752534525425, "grad_norm": 0.2769849138728087, "learning_rate": 1.5009699514736647e-05, "loss": 0.2423, "step": 4422 }, { "epoch": 0.35307735291769776, "grad_norm": 0.2699965512531287, "learning_rate": 1.5007461722869753e-05, "loss": 0.1637, "step": 4423 }, { "epoch": 0.3531571804901413, "grad_norm": 0.274065623870225, "learning_rate": 1.500522359627386e-05, "loss": 0.1453, "step": 4424 }, { "epoch": 0.3532370080625848, "grad_norm": 0.31861877261791044, "learning_rate": 1.5002985135098578e-05, "loss": 0.1786, "step": 4425 }, { "epoch": 0.35331683563502836, "grad_norm": 0.273714206725567, "learning_rate": 1.5000746339493542e-05, "loss": 0.2109, "step": 4426 }, { "epoch": 0.3533966632074719, "grad_norm": 0.29539880332977175, "learning_rate": 1.4998507209608397e-05, "loss": 0.2394, "step": 4427 }, { "epoch": 0.3534764907799154, "grad_norm": 0.3011258147545039, "learning_rate": 1.4996267745592829e-05, "loss": 0.164, "step": 4428 }, { "epoch": 0.3535563183523589, "grad_norm": 0.28887196830069606, "learning_rate": 1.4994027947596534e-05, "loss": 0.1872, "step": 4429 }, { "epoch": 0.3536361459248024, "grad_norm": 0.2906526853396117, "learning_rate": 1.4991787815769236e-05, "loss": 0.1737, "step": 4430 }, { "epoch": 0.35371597349724593, "grad_norm": 0.2678039685459623, "learning_rate": 1.4989547350260676e-05, "loss": 0.1486, "step": 4431 }, { "epoch": 0.3537958010696895, "grad_norm": 0.3446604816893872, "learning_rate": 1.4987306551220622e-05, "loss": 0.2337, "step": 4432 }, { "epoch": 0.353875628642133, "grad_norm": 0.2749667001412713, "learning_rate": 1.498506541879886e-05, "loss": 0.1565, "step": 4433 }, { "epoch": 0.3539554562145765, "grad_norm": 0.3272366892419928, "learning_rate": 1.4982823953145204e-05, "loss": 0.1658, "step": 4434 }, { "epoch": 0.35403528378702004, "grad_norm": 0.302520206061007, "learning_rate": 1.4980582154409486e-05, "loss": 0.2073, "step": 4435 }, { "epoch": 0.35411511135946355, "grad_norm": 0.33000936080934573, "learning_rate": 1.497834002274156e-05, "loss": 0.2313, "step": 4436 }, { "epoch": 0.35419493893190707, "grad_norm": 0.2755755452634546, "learning_rate": 1.4976097558291307e-05, "loss": 0.1976, "step": 4437 }, { "epoch": 0.3542747665043506, "grad_norm": 0.3176780021569522, "learning_rate": 1.4973854761208621e-05, "loss": 0.1688, "step": 4438 }, { "epoch": 0.35435459407679415, "grad_norm": 0.32542439581901417, "learning_rate": 1.4971611631643432e-05, "loss": 0.1898, "step": 4439 }, { "epoch": 0.35443442164923766, "grad_norm": 0.3844619978931664, "learning_rate": 1.4969368169745679e-05, "loss": 0.2257, "step": 4440 }, { "epoch": 0.3545142492216812, "grad_norm": 0.3984369759066753, "learning_rate": 1.4967124375665331e-05, "loss": 0.1795, "step": 4441 }, { "epoch": 0.3545940767941247, "grad_norm": 0.31275895517588415, "learning_rate": 1.4964880249552374e-05, "loss": 0.1733, "step": 4442 }, { "epoch": 0.3546739043665682, "grad_norm": 0.2902386525995359, "learning_rate": 1.4962635791556821e-05, "loss": 0.1919, "step": 4443 }, { "epoch": 0.3547537319390117, "grad_norm": 0.29422446793460844, "learning_rate": 1.4960391001828702e-05, "loss": 0.1761, "step": 4444 }, { "epoch": 0.35483355951145523, "grad_norm": 0.3361738998279161, "learning_rate": 1.4958145880518077e-05, "loss": 0.1916, "step": 4445 }, { "epoch": 0.3549133870838988, "grad_norm": 0.2807545895841595, "learning_rate": 1.495590042777502e-05, "loss": 0.1717, "step": 4446 }, { "epoch": 0.3549932146563423, "grad_norm": 0.28454425681823825, "learning_rate": 1.4953654643749636e-05, "loss": 0.1837, "step": 4447 }, { "epoch": 0.35507304222878583, "grad_norm": 0.2893380897734455, "learning_rate": 1.495140852859204e-05, "loss": 0.1472, "step": 4448 }, { "epoch": 0.35515286980122934, "grad_norm": 0.3275887793438928, "learning_rate": 1.4949162082452378e-05, "loss": 0.1912, "step": 4449 }, { "epoch": 0.35523269737367286, "grad_norm": 0.3156563707216631, "learning_rate": 1.4946915305480818e-05, "loss": 0.2098, "step": 4450 }, { "epoch": 0.35531252494611637, "grad_norm": 0.36279676452826376, "learning_rate": 1.4944668197827545e-05, "loss": 0.1724, "step": 4451 }, { "epoch": 0.3553923525185599, "grad_norm": 0.2574733414569203, "learning_rate": 1.4942420759642773e-05, "loss": 0.1789, "step": 4452 }, { "epoch": 0.35547218009100345, "grad_norm": 0.31815869271039976, "learning_rate": 1.494017299107673e-05, "loss": 0.1821, "step": 4453 }, { "epoch": 0.35555200766344697, "grad_norm": 0.3098794580490473, "learning_rate": 1.4937924892279676e-05, "loss": 0.162, "step": 4454 }, { "epoch": 0.3556318352358905, "grad_norm": 0.29060180920651146, "learning_rate": 1.4935676463401884e-05, "loss": 0.2093, "step": 4455 }, { "epoch": 0.355711662808334, "grad_norm": 0.2773931342876023, "learning_rate": 1.4933427704593651e-05, "loss": 0.1827, "step": 4456 }, { "epoch": 0.3557914903807775, "grad_norm": 0.3801234588315814, "learning_rate": 1.4931178616005302e-05, "loss": 0.2621, "step": 4457 }, { "epoch": 0.355871317953221, "grad_norm": 0.29408907315118343, "learning_rate": 1.4928929197787173e-05, "loss": 0.1556, "step": 4458 }, { "epoch": 0.3559511455256646, "grad_norm": 0.32273461226312095, "learning_rate": 1.4926679450089638e-05, "loss": 0.1745, "step": 4459 }, { "epoch": 0.3560309730981081, "grad_norm": 0.3648656497819162, "learning_rate": 1.4924429373063076e-05, "loss": 0.1626, "step": 4460 }, { "epoch": 0.3561108006705516, "grad_norm": 0.3366881446810674, "learning_rate": 1.4922178966857898e-05, "loss": 0.1555, "step": 4461 }, { "epoch": 0.35619062824299513, "grad_norm": 0.2961112278831011, "learning_rate": 1.4919928231624537e-05, "loss": 0.1813, "step": 4462 }, { "epoch": 0.35627045581543865, "grad_norm": 0.30458103407142845, "learning_rate": 1.4917677167513445e-05, "loss": 0.2415, "step": 4463 }, { "epoch": 0.35635028338788216, "grad_norm": 0.3090081798304149, "learning_rate": 1.4915425774675095e-05, "loss": 0.1741, "step": 4464 }, { "epoch": 0.3564301109603257, "grad_norm": 0.30227196441377424, "learning_rate": 1.4913174053259982e-05, "loss": 0.1944, "step": 4465 }, { "epoch": 0.35650993853276924, "grad_norm": 0.2923381502337098, "learning_rate": 1.4910922003418628e-05, "loss": 0.1462, "step": 4466 }, { "epoch": 0.35658976610521276, "grad_norm": 0.2612597402945211, "learning_rate": 1.4908669625301575e-05, "loss": 0.1601, "step": 4467 }, { "epoch": 0.35666959367765627, "grad_norm": 0.33856898097306926, "learning_rate": 1.4906416919059378e-05, "loss": 0.1623, "step": 4468 }, { "epoch": 0.3567494212500998, "grad_norm": 0.27543223872852196, "learning_rate": 1.4904163884842634e-05, "loss": 0.1538, "step": 4469 }, { "epoch": 0.3568292488225433, "grad_norm": 0.27718007455282156, "learning_rate": 1.4901910522801936e-05, "loss": 0.1975, "step": 4470 }, { "epoch": 0.3569090763949868, "grad_norm": 0.31286295187574176, "learning_rate": 1.489965683308792e-05, "loss": 0.1629, "step": 4471 }, { "epoch": 0.3569889039674303, "grad_norm": 0.32235425264707324, "learning_rate": 1.4897402815851236e-05, "loss": 0.2003, "step": 4472 }, { "epoch": 0.3570687315398739, "grad_norm": 0.30646994781127423, "learning_rate": 1.4895148471242554e-05, "loss": 0.1557, "step": 4473 }, { "epoch": 0.3571485591123174, "grad_norm": 0.30393734805428635, "learning_rate": 1.4892893799412569e-05, "loss": 0.1663, "step": 4474 }, { "epoch": 0.3572283866847609, "grad_norm": 0.28628657803761776, "learning_rate": 1.4890638800511995e-05, "loss": 0.1556, "step": 4475 }, { "epoch": 0.35730821425720444, "grad_norm": 0.2768851786237792, "learning_rate": 1.4888383474691575e-05, "loss": 0.2187, "step": 4476 }, { "epoch": 0.35738804182964795, "grad_norm": 0.3275730929198688, "learning_rate": 1.4886127822102064e-05, "loss": 0.1663, "step": 4477 }, { "epoch": 0.35746786940209146, "grad_norm": 0.3163680098168231, "learning_rate": 1.4883871842894242e-05, "loss": 0.2175, "step": 4478 }, { "epoch": 0.357547696974535, "grad_norm": 0.3056844983428059, "learning_rate": 1.4881615537218918e-05, "loss": 0.1805, "step": 4479 }, { "epoch": 0.35762752454697855, "grad_norm": 0.30209561769654614, "learning_rate": 1.4879358905226913e-05, "loss": 0.1618, "step": 4480 }, { "epoch": 0.35770735211942206, "grad_norm": 0.3312923250584899, "learning_rate": 1.4877101947069074e-05, "loss": 0.1544, "step": 4481 }, { "epoch": 0.3577871796918656, "grad_norm": 0.2852808208311757, "learning_rate": 1.4874844662896271e-05, "loss": 0.2176, "step": 4482 }, { "epoch": 0.3578670072643091, "grad_norm": 0.2934500413733696, "learning_rate": 1.4872587052859396e-05, "loss": 0.1549, "step": 4483 }, { "epoch": 0.3579468348367526, "grad_norm": 0.2897605797024856, "learning_rate": 1.4870329117109357e-05, "loss": 0.1728, "step": 4484 }, { "epoch": 0.3580266624091961, "grad_norm": 0.338190386768392, "learning_rate": 1.4868070855797093e-05, "loss": 0.2093, "step": 4485 }, { "epoch": 0.35810648998163963, "grad_norm": 0.3255639159685175, "learning_rate": 1.4865812269073553e-05, "loss": 0.1709, "step": 4486 }, { "epoch": 0.3581863175540832, "grad_norm": 0.29611779785868175, "learning_rate": 1.4863553357089723e-05, "loss": 0.1776, "step": 4487 }, { "epoch": 0.3582661451265267, "grad_norm": 0.33742991387984017, "learning_rate": 1.4861294119996596e-05, "loss": 0.1529, "step": 4488 }, { "epoch": 0.3583459726989702, "grad_norm": 0.286633297054952, "learning_rate": 1.4859034557945196e-05, "loss": 0.1856, "step": 4489 }, { "epoch": 0.35842580027141374, "grad_norm": 0.3878131351642767, "learning_rate": 1.4856774671086564e-05, "loss": 0.1424, "step": 4490 }, { "epoch": 0.35850562784385726, "grad_norm": 0.29430993870710437, "learning_rate": 1.4854514459571767e-05, "loss": 0.2161, "step": 4491 }, { "epoch": 0.35858545541630077, "grad_norm": 0.32159336461803634, "learning_rate": 1.485225392355189e-05, "loss": 0.1736, "step": 4492 }, { "epoch": 0.35866528298874434, "grad_norm": 0.42174691920633817, "learning_rate": 1.4849993063178039e-05, "loss": 0.1781, "step": 4493 }, { "epoch": 0.35874511056118785, "grad_norm": 0.3009058240222651, "learning_rate": 1.4847731878601348e-05, "loss": 0.163, "step": 4494 }, { "epoch": 0.35882493813363137, "grad_norm": 0.3191712323637005, "learning_rate": 1.4845470369972961e-05, "loss": 0.1724, "step": 4495 }, { "epoch": 0.3589047657060749, "grad_norm": 0.31529193060891725, "learning_rate": 1.484320853744406e-05, "loss": 0.2223, "step": 4496 }, { "epoch": 0.3589845932785184, "grad_norm": 0.39450542965648616, "learning_rate": 1.484094638116583e-05, "loss": 0.219, "step": 4497 }, { "epoch": 0.3590644208509619, "grad_norm": 0.2550026948302681, "learning_rate": 1.4838683901289496e-05, "loss": 0.1588, "step": 4498 }, { "epoch": 0.3591442484234054, "grad_norm": 0.2785719987963909, "learning_rate": 1.4836421097966289e-05, "loss": 0.2065, "step": 4499 }, { "epoch": 0.359224075995849, "grad_norm": 0.3140471339478786, "learning_rate": 1.4834157971347475e-05, "loss": 0.1786, "step": 4500 }, { "epoch": 0.3593039035682925, "grad_norm": 0.2722732299915497, "learning_rate": 1.4831894521584332e-05, "loss": 0.1471, "step": 4501 }, { "epoch": 0.359383731140736, "grad_norm": 0.33673549936951946, "learning_rate": 1.4829630748828157e-05, "loss": 0.1728, "step": 4502 }, { "epoch": 0.35946355871317953, "grad_norm": 0.3841016037458141, "learning_rate": 1.4827366653230284e-05, "loss": 0.2162, "step": 4503 }, { "epoch": 0.35954338628562305, "grad_norm": 0.3588367435681014, "learning_rate": 1.482510223494205e-05, "loss": 0.1885, "step": 4504 }, { "epoch": 0.35962321385806656, "grad_norm": 0.2898436837868622, "learning_rate": 1.482283749411483e-05, "loss": 0.197, "step": 4505 }, { "epoch": 0.3597030414305101, "grad_norm": 0.33090826291070763, "learning_rate": 1.4820572430900006e-05, "loss": 0.1824, "step": 4506 }, { "epoch": 0.35978286900295364, "grad_norm": 0.2801311618870521, "learning_rate": 1.4818307045448994e-05, "loss": 0.1618, "step": 4507 }, { "epoch": 0.35986269657539716, "grad_norm": 0.36408638284564027, "learning_rate": 1.4816041337913227e-05, "loss": 0.1672, "step": 4508 }, { "epoch": 0.35994252414784067, "grad_norm": 0.2811806696040263, "learning_rate": 1.4813775308444155e-05, "loss": 0.1705, "step": 4509 }, { "epoch": 0.3600223517202842, "grad_norm": 0.3050582325192553, "learning_rate": 1.481150895719325e-05, "loss": 0.1759, "step": 4510 }, { "epoch": 0.3601021792927277, "grad_norm": 0.3489316791743539, "learning_rate": 1.4809242284312013e-05, "loss": 0.1994, "step": 4511 }, { "epoch": 0.3601820068651712, "grad_norm": 0.32080059421531676, "learning_rate": 1.4806975289951964e-05, "loss": 0.1368, "step": 4512 }, { "epoch": 0.3602618344376147, "grad_norm": 0.32991036562003645, "learning_rate": 1.4804707974264638e-05, "loss": 0.1695, "step": 4513 }, { "epoch": 0.3603416620100583, "grad_norm": 0.2909090045661054, "learning_rate": 1.4802440337401599e-05, "loss": 0.1292, "step": 4514 }, { "epoch": 0.3604214895825018, "grad_norm": 0.32301088341023904, "learning_rate": 1.4800172379514428e-05, "loss": 0.2116, "step": 4515 }, { "epoch": 0.3605013171549453, "grad_norm": 0.34424363808953856, "learning_rate": 1.4797904100754732e-05, "loss": 0.1679, "step": 4516 }, { "epoch": 0.36058114472738884, "grad_norm": 0.28128321981588855, "learning_rate": 1.479563550127413e-05, "loss": 0.1542, "step": 4517 }, { "epoch": 0.36066097229983235, "grad_norm": 0.3237979519757394, "learning_rate": 1.4793366581224278e-05, "loss": 0.1554, "step": 4518 }, { "epoch": 0.36074079987227586, "grad_norm": 0.27324714230916386, "learning_rate": 1.4791097340756835e-05, "loss": 0.1678, "step": 4519 }, { "epoch": 0.36082062744471943, "grad_norm": 0.27835193322258084, "learning_rate": 1.4788827780023499e-05, "loss": 0.2013, "step": 4520 }, { "epoch": 0.36090045501716295, "grad_norm": 0.2782797196682051, "learning_rate": 1.4786557899175974e-05, "loss": 0.2448, "step": 4521 }, { "epoch": 0.36098028258960646, "grad_norm": 0.29658127543893714, "learning_rate": 1.4784287698365998e-05, "loss": 0.1842, "step": 4522 }, { "epoch": 0.36106011016205, "grad_norm": 0.30593269363564896, "learning_rate": 1.4782017177745322e-05, "loss": 0.2307, "step": 4523 }, { "epoch": 0.3611399377344935, "grad_norm": 0.32949924018850413, "learning_rate": 1.4779746337465723e-05, "loss": 0.2336, "step": 4524 }, { "epoch": 0.361219765306937, "grad_norm": 0.38912707506004335, "learning_rate": 1.4777475177678996e-05, "loss": 0.1733, "step": 4525 }, { "epoch": 0.3612995928793805, "grad_norm": 0.3199869191320582, "learning_rate": 1.4775203698536959e-05, "loss": 0.2062, "step": 4526 }, { "epoch": 0.3613794204518241, "grad_norm": 0.3489701497427355, "learning_rate": 1.4772931900191455e-05, "loss": 0.1703, "step": 4527 }, { "epoch": 0.3614592480242676, "grad_norm": 0.36584065746471545, "learning_rate": 1.4770659782794338e-05, "loss": 0.2345, "step": 4528 }, { "epoch": 0.3615390755967111, "grad_norm": 0.3466828812701533, "learning_rate": 1.4768387346497498e-05, "loss": 0.1693, "step": 4529 }, { "epoch": 0.3616189031691546, "grad_norm": 0.3037986465366727, "learning_rate": 1.4766114591452833e-05, "loss": 0.1888, "step": 4530 }, { "epoch": 0.36169873074159814, "grad_norm": 0.4109549572353088, "learning_rate": 1.476384151781227e-05, "loss": 0.182, "step": 4531 }, { "epoch": 0.36177855831404165, "grad_norm": 0.33302407657820304, "learning_rate": 1.4761568125727752e-05, "loss": 0.174, "step": 4532 }, { "epoch": 0.36185838588648517, "grad_norm": 0.41454963308787773, "learning_rate": 1.4759294415351249e-05, "loss": 0.2471, "step": 4533 }, { "epoch": 0.36193821345892874, "grad_norm": 0.26643486620690904, "learning_rate": 1.4757020386834749e-05, "loss": 0.1927, "step": 4534 }, { "epoch": 0.36201804103137225, "grad_norm": 0.26203762796604263, "learning_rate": 1.4754746040330261e-05, "loss": 0.197, "step": 4535 }, { "epoch": 0.36209786860381576, "grad_norm": 0.3129181319814909, "learning_rate": 1.4752471375989816e-05, "loss": 0.1696, "step": 4536 }, { "epoch": 0.3621776961762593, "grad_norm": 0.30464337527670615, "learning_rate": 1.475019639396547e-05, "loss": 0.212, "step": 4537 }, { "epoch": 0.3622575237487028, "grad_norm": 0.3148526441034792, "learning_rate": 1.4747921094409295e-05, "loss": 0.1865, "step": 4538 }, { "epoch": 0.3623373513211463, "grad_norm": 0.30936113156773043, "learning_rate": 1.4745645477473378e-05, "loss": 0.1705, "step": 4539 }, { "epoch": 0.3624171788935898, "grad_norm": 0.32996188710217655, "learning_rate": 1.4743369543309847e-05, "loss": 0.1983, "step": 4540 }, { "epoch": 0.3624970064660334, "grad_norm": 0.334793797853561, "learning_rate": 1.4741093292070829e-05, "loss": 0.1492, "step": 4541 }, { "epoch": 0.3625768340384769, "grad_norm": 0.3374592399818998, "learning_rate": 1.4738816723908491e-05, "loss": 0.1589, "step": 4542 }, { "epoch": 0.3626566616109204, "grad_norm": 0.28637463750136166, "learning_rate": 1.4736539838975006e-05, "loss": 0.212, "step": 4543 }, { "epoch": 0.36273648918336393, "grad_norm": 0.29761744348046393, "learning_rate": 1.4734262637422578e-05, "loss": 0.2083, "step": 4544 }, { "epoch": 0.36281631675580744, "grad_norm": 0.31040874045783357, "learning_rate": 1.4731985119403429e-05, "loss": 0.1809, "step": 4545 }, { "epoch": 0.36289614432825096, "grad_norm": 0.2743187845098606, "learning_rate": 1.47297072850698e-05, "loss": 0.1837, "step": 4546 }, { "epoch": 0.3629759719006945, "grad_norm": 0.29317884951105155, "learning_rate": 1.4727429134573959e-05, "loss": 0.1803, "step": 4547 }, { "epoch": 0.36305579947313804, "grad_norm": 0.2786832613181278, "learning_rate": 1.4725150668068185e-05, "loss": 0.1725, "step": 4548 }, { "epoch": 0.36313562704558155, "grad_norm": 0.2789712773228895, "learning_rate": 1.472287188570479e-05, "loss": 0.1955, "step": 4549 }, { "epoch": 0.36321545461802507, "grad_norm": 0.26840977704349517, "learning_rate": 1.47205927876361e-05, "loss": 0.173, "step": 4550 }, { "epoch": 0.3632952821904686, "grad_norm": 0.3267116372902149, "learning_rate": 1.4718313374014464e-05, "loss": 0.174, "step": 4551 }, { "epoch": 0.3633751097629121, "grad_norm": 0.33977766261983167, "learning_rate": 1.4716033644992252e-05, "loss": 0.119, "step": 4552 }, { "epoch": 0.3634549373353556, "grad_norm": 0.2949593329606495, "learning_rate": 1.4713753600721852e-05, "loss": 0.1707, "step": 4553 }, { "epoch": 0.3635347649077992, "grad_norm": 0.31887046642671735, "learning_rate": 1.4711473241355678e-05, "loss": 0.146, "step": 4554 }, { "epoch": 0.3636145924802427, "grad_norm": 0.3047809430692343, "learning_rate": 1.4709192567046164e-05, "loss": 0.1648, "step": 4555 }, { "epoch": 0.3636944200526862, "grad_norm": 0.3639573915683323, "learning_rate": 1.4706911577945764e-05, "loss": 0.1892, "step": 4556 }, { "epoch": 0.3637742476251297, "grad_norm": 0.3239383632714432, "learning_rate": 1.4704630274206952e-05, "loss": 0.2004, "step": 4557 }, { "epoch": 0.36385407519757323, "grad_norm": 0.4056814348362775, "learning_rate": 1.4702348655982221e-05, "loss": 0.164, "step": 4558 }, { "epoch": 0.36393390277001675, "grad_norm": 0.32043119561442784, "learning_rate": 1.4700066723424096e-05, "loss": 0.1645, "step": 4559 }, { "epoch": 0.36401373034246026, "grad_norm": 0.3801903119135438, "learning_rate": 1.4697784476685111e-05, "loss": 0.1528, "step": 4560 }, { "epoch": 0.36409355791490383, "grad_norm": 0.36705665206290206, "learning_rate": 1.469550191591782e-05, "loss": 0.1886, "step": 4561 }, { "epoch": 0.36417338548734735, "grad_norm": 0.4091272253024428, "learning_rate": 1.4693219041274813e-05, "loss": 0.1628, "step": 4562 }, { "epoch": 0.36425321305979086, "grad_norm": 0.3331422931436263, "learning_rate": 1.4690935852908684e-05, "loss": 0.1806, "step": 4563 }, { "epoch": 0.3643330406322344, "grad_norm": 0.3464503879132641, "learning_rate": 1.4688652350972057e-05, "loss": 0.1449, "step": 4564 }, { "epoch": 0.3644128682046779, "grad_norm": 0.3119811318469501, "learning_rate": 1.4686368535617574e-05, "loss": 0.2179, "step": 4565 }, { "epoch": 0.3644926957771214, "grad_norm": 0.3175688878239794, "learning_rate": 1.4684084406997903e-05, "loss": 0.1744, "step": 4566 }, { "epoch": 0.3645725233495649, "grad_norm": 0.2833670594777037, "learning_rate": 1.4681799965265726e-05, "loss": 0.2012, "step": 4567 }, { "epoch": 0.3646523509220085, "grad_norm": 0.30125928215285414, "learning_rate": 1.4679515210573748e-05, "loss": 0.2102, "step": 4568 }, { "epoch": 0.364732178494452, "grad_norm": 0.4521102012583061, "learning_rate": 1.4677230143074696e-05, "loss": 0.199, "step": 4569 }, { "epoch": 0.3648120060668955, "grad_norm": 0.31244171629618855, "learning_rate": 1.467494476292132e-05, "loss": 0.1881, "step": 4570 }, { "epoch": 0.364891833639339, "grad_norm": 0.3470038487234408, "learning_rate": 1.4672659070266388e-05, "loss": 0.18, "step": 4571 }, { "epoch": 0.36497166121178254, "grad_norm": 0.2992474195161001, "learning_rate": 1.4670373065262688e-05, "loss": 0.1658, "step": 4572 }, { "epoch": 0.36505148878422605, "grad_norm": 0.3231129443904055, "learning_rate": 1.4668086748063034e-05, "loss": 0.2108, "step": 4573 }, { "epoch": 0.36513131635666957, "grad_norm": 0.4340530790643972, "learning_rate": 1.4665800118820248e-05, "loss": 0.1603, "step": 4574 }, { "epoch": 0.36521114392911314, "grad_norm": 0.2854310308860778, "learning_rate": 1.4663513177687197e-05, "loss": 0.1577, "step": 4575 }, { "epoch": 0.36529097150155665, "grad_norm": 0.33523566273459887, "learning_rate": 1.466122592481674e-05, "loss": 0.2068, "step": 4576 }, { "epoch": 0.36537079907400016, "grad_norm": 0.2923528153455782, "learning_rate": 1.4658938360361777e-05, "loss": 0.1771, "step": 4577 }, { "epoch": 0.3654506266464437, "grad_norm": 0.2706279449075761, "learning_rate": 1.465665048447522e-05, "loss": 0.1547, "step": 4578 }, { "epoch": 0.3655304542188872, "grad_norm": 0.38993014733700615, "learning_rate": 1.4654362297310012e-05, "loss": 0.1853, "step": 4579 }, { "epoch": 0.3656102817913307, "grad_norm": 0.2874808337317321, "learning_rate": 1.4652073799019099e-05, "loss": 0.1511, "step": 4580 }, { "epoch": 0.3656901093637743, "grad_norm": 0.2906365014483851, "learning_rate": 1.4649784989755467e-05, "loss": 0.1784, "step": 4581 }, { "epoch": 0.3657699369362178, "grad_norm": 0.2892349900627248, "learning_rate": 1.4647495869672106e-05, "loss": 0.2351, "step": 4582 }, { "epoch": 0.3658497645086613, "grad_norm": 0.25846142753046314, "learning_rate": 1.464520643892204e-05, "loss": 0.1574, "step": 4583 }, { "epoch": 0.3659295920811048, "grad_norm": 0.28321178523307805, "learning_rate": 1.4642916697658305e-05, "loss": 0.1631, "step": 4584 }, { "epoch": 0.36600941965354833, "grad_norm": 0.2772610607276121, "learning_rate": 1.4640626646033961e-05, "loss": 0.1668, "step": 4585 }, { "epoch": 0.36608924722599184, "grad_norm": 0.31689306283299307, "learning_rate": 1.4638336284202095e-05, "loss": 0.2049, "step": 4586 }, { "epoch": 0.36616907479843536, "grad_norm": 0.2922974777693449, "learning_rate": 1.4636045612315801e-05, "loss": 0.1478, "step": 4587 }, { "epoch": 0.3662489023708789, "grad_norm": 0.3536104593549599, "learning_rate": 1.4633754630528207e-05, "loss": 0.1541, "step": 4588 }, { "epoch": 0.36632872994332244, "grad_norm": 0.3072138626012962, "learning_rate": 1.4631463338992453e-05, "loss": 0.1632, "step": 4589 }, { "epoch": 0.36640855751576595, "grad_norm": 0.27019696136474186, "learning_rate": 1.4629171737861704e-05, "loss": 0.1696, "step": 4590 }, { "epoch": 0.36648838508820947, "grad_norm": 0.3152721068052027, "learning_rate": 1.4626879827289143e-05, "loss": 0.2071, "step": 4591 }, { "epoch": 0.366568212660653, "grad_norm": 0.367958663225642, "learning_rate": 1.4624587607427975e-05, "loss": 0.2129, "step": 4592 }, { "epoch": 0.3666480402330965, "grad_norm": 0.30171587403466904, "learning_rate": 1.462229507843143e-05, "loss": 0.1982, "step": 4593 }, { "epoch": 0.36672786780554, "grad_norm": 0.2976033212078288, "learning_rate": 1.4620002240452746e-05, "loss": 0.2193, "step": 4594 }, { "epoch": 0.3668076953779836, "grad_norm": 0.29249692023253293, "learning_rate": 1.46177090936452e-05, "loss": 0.1374, "step": 4595 }, { "epoch": 0.3668875229504271, "grad_norm": 0.32591963876810115, "learning_rate": 1.4615415638162073e-05, "loss": 0.212, "step": 4596 }, { "epoch": 0.3669673505228706, "grad_norm": 0.3015341142480806, "learning_rate": 1.4613121874156682e-05, "loss": 0.2009, "step": 4597 }, { "epoch": 0.3670471780953141, "grad_norm": 0.29351702683682057, "learning_rate": 1.4610827801782343e-05, "loss": 0.1969, "step": 4598 }, { "epoch": 0.36712700566775763, "grad_norm": 0.3758809145153848, "learning_rate": 1.4608533421192415e-05, "loss": 0.1667, "step": 4599 }, { "epoch": 0.36720683324020115, "grad_norm": 0.3334528821871453, "learning_rate": 1.4606238732540263e-05, "loss": 0.1973, "step": 4600 }, { "epoch": 0.36728666081264466, "grad_norm": 0.30952694484252585, "learning_rate": 1.4603943735979283e-05, "loss": 0.129, "step": 4601 }, { "epoch": 0.36736648838508823, "grad_norm": 0.3145700395288467, "learning_rate": 1.4601648431662883e-05, "loss": 0.1505, "step": 4602 }, { "epoch": 0.36744631595753174, "grad_norm": 0.30623865706803427, "learning_rate": 1.4599352819744497e-05, "loss": 0.1716, "step": 4603 }, { "epoch": 0.36752614352997526, "grad_norm": 0.32067691779635477, "learning_rate": 1.4597056900377577e-05, "loss": 0.167, "step": 4604 }, { "epoch": 0.36760597110241877, "grad_norm": 0.3268850259832719, "learning_rate": 1.4594760673715593e-05, "loss": 0.1818, "step": 4605 }, { "epoch": 0.3676857986748623, "grad_norm": 0.3372799134249487, "learning_rate": 1.4592464139912045e-05, "loss": 0.1677, "step": 4606 }, { "epoch": 0.3677656262473058, "grad_norm": 0.3713090957132281, "learning_rate": 1.4590167299120442e-05, "loss": 0.1692, "step": 4607 }, { "epoch": 0.36784545381974937, "grad_norm": 0.2947421755444167, "learning_rate": 1.4587870151494319e-05, "loss": 0.1712, "step": 4608 }, { "epoch": 0.3679252813921929, "grad_norm": 0.32044151551027783, "learning_rate": 1.4585572697187233e-05, "loss": 0.1814, "step": 4609 }, { "epoch": 0.3680051089646364, "grad_norm": 0.28710076399278694, "learning_rate": 1.458327493635276e-05, "loss": 0.1678, "step": 4610 }, { "epoch": 0.3680849365370799, "grad_norm": 0.33358430341123474, "learning_rate": 1.4580976869144494e-05, "loss": 0.1846, "step": 4611 }, { "epoch": 0.3681647641095234, "grad_norm": 0.34555469536289324, "learning_rate": 1.4578678495716059e-05, "loss": 0.1804, "step": 4612 }, { "epoch": 0.36824459168196694, "grad_norm": 0.31390450355728855, "learning_rate": 1.4576379816221078e-05, "loss": 0.1734, "step": 4613 }, { "epoch": 0.36832441925441045, "grad_norm": 0.33587151252501724, "learning_rate": 1.4574080830813222e-05, "loss": 0.175, "step": 4614 }, { "epoch": 0.368404246826854, "grad_norm": 0.3139183408160783, "learning_rate": 1.457178153964616e-05, "loss": 0.1675, "step": 4615 }, { "epoch": 0.36848407439929753, "grad_norm": 0.42062928106935354, "learning_rate": 1.4569481942873598e-05, "loss": 0.1948, "step": 4616 }, { "epoch": 0.36856390197174105, "grad_norm": 0.2687402504075922, "learning_rate": 1.4567182040649251e-05, "loss": 0.1941, "step": 4617 }, { "epoch": 0.36864372954418456, "grad_norm": 0.37486329530558604, "learning_rate": 1.4564881833126858e-05, "loss": 0.1607, "step": 4618 }, { "epoch": 0.3687235571166281, "grad_norm": 0.304089792228035, "learning_rate": 1.4562581320460182e-05, "loss": 0.2073, "step": 4619 }, { "epoch": 0.3688033846890716, "grad_norm": 0.32106943262935295, "learning_rate": 1.4560280502803e-05, "loss": 0.1956, "step": 4620 }, { "epoch": 0.3688832122615151, "grad_norm": 0.27037399236577464, "learning_rate": 1.4557979380309112e-05, "loss": 0.2193, "step": 4621 }, { "epoch": 0.3689630398339587, "grad_norm": 0.28320647373769453, "learning_rate": 1.4555677953132339e-05, "loss": 0.2074, "step": 4622 }, { "epoch": 0.3690428674064022, "grad_norm": 0.3152782125011384, "learning_rate": 1.4553376221426528e-05, "loss": 0.1594, "step": 4623 }, { "epoch": 0.3691226949788457, "grad_norm": 0.28340906016401735, "learning_rate": 1.4551074185345531e-05, "loss": 0.1677, "step": 4624 }, { "epoch": 0.3692025225512892, "grad_norm": 0.3097806331479169, "learning_rate": 1.4548771845043237e-05, "loss": 0.1766, "step": 4625 }, { "epoch": 0.3692823501237327, "grad_norm": 0.3171596053284124, "learning_rate": 1.4546469200673548e-05, "loss": 0.2294, "step": 4626 }, { "epoch": 0.36936217769617624, "grad_norm": 0.3027366462155515, "learning_rate": 1.4544166252390386e-05, "loss": 0.193, "step": 4627 }, { "epoch": 0.36944200526861976, "grad_norm": 0.30262540111631575, "learning_rate": 1.4541863000347691e-05, "loss": 0.1961, "step": 4628 }, { "epoch": 0.3695218328410633, "grad_norm": 0.30506123324196915, "learning_rate": 1.453955944469943e-05, "loss": 0.167, "step": 4629 }, { "epoch": 0.36960166041350684, "grad_norm": 0.32600626838214053, "learning_rate": 1.4537255585599583e-05, "loss": 0.2131, "step": 4630 }, { "epoch": 0.36968148798595035, "grad_norm": 0.30147235864573485, "learning_rate": 1.4534951423202155e-05, "loss": 0.165, "step": 4631 }, { "epoch": 0.36976131555839387, "grad_norm": 0.2784106812197493, "learning_rate": 1.4532646957661176e-05, "loss": 0.1671, "step": 4632 }, { "epoch": 0.3698411431308374, "grad_norm": 0.3674274796975513, "learning_rate": 1.453034218913068e-05, "loss": 0.2018, "step": 4633 }, { "epoch": 0.3699209707032809, "grad_norm": 0.3424648056827262, "learning_rate": 1.4528037117764744e-05, "loss": 0.1945, "step": 4634 }, { "epoch": 0.37000079827572446, "grad_norm": 0.28846993296867013, "learning_rate": 1.4525731743717439e-05, "loss": 0.2249, "step": 4635 }, { "epoch": 0.370080625848168, "grad_norm": 0.2569161181413616, "learning_rate": 1.4523426067142882e-05, "loss": 0.2226, "step": 4636 }, { "epoch": 0.3701604534206115, "grad_norm": 0.2982070913181193, "learning_rate": 1.452112008819519e-05, "loss": 0.1777, "step": 4637 }, { "epoch": 0.370240280993055, "grad_norm": 0.38446492789510855, "learning_rate": 1.4518813807028514e-05, "loss": 0.1771, "step": 4638 }, { "epoch": 0.3703201085654985, "grad_norm": 0.3009999930936285, "learning_rate": 1.4516507223797018e-05, "loss": 0.2035, "step": 4639 }, { "epoch": 0.37039993613794203, "grad_norm": 0.28625785405173804, "learning_rate": 1.4514200338654884e-05, "loss": 0.1812, "step": 4640 }, { "epoch": 0.37047976371038555, "grad_norm": 0.3568391476330166, "learning_rate": 1.4511893151756327e-05, "loss": 0.1526, "step": 4641 }, { "epoch": 0.3705595912828291, "grad_norm": 0.37655781294875496, "learning_rate": 1.4509585663255568e-05, "loss": 0.192, "step": 4642 }, { "epoch": 0.37063941885527263, "grad_norm": 0.3277415482545615, "learning_rate": 1.4507277873306853e-05, "loss": 0.1729, "step": 4643 }, { "epoch": 0.37071924642771614, "grad_norm": 0.268058431359764, "learning_rate": 1.4504969782064446e-05, "loss": 0.1906, "step": 4644 }, { "epoch": 0.37079907400015966, "grad_norm": 0.30418554997076525, "learning_rate": 1.4502661389682641e-05, "loss": 0.1919, "step": 4645 }, { "epoch": 0.37087890157260317, "grad_norm": 0.3481471724322032, "learning_rate": 1.450035269631574e-05, "loss": 0.1668, "step": 4646 }, { "epoch": 0.3709587291450467, "grad_norm": 0.3396240727353597, "learning_rate": 1.4498043702118072e-05, "loss": 0.184, "step": 4647 }, { "epoch": 0.3710385567174902, "grad_norm": 0.2692548980779335, "learning_rate": 1.449573440724398e-05, "loss": 0.1806, "step": 4648 }, { "epoch": 0.37111838428993377, "grad_norm": 0.3031665303728966, "learning_rate": 1.4493424811847839e-05, "loss": 0.1661, "step": 4649 }, { "epoch": 0.3711982118623773, "grad_norm": 0.2916720894128413, "learning_rate": 1.4491114916084028e-05, "loss": 0.1655, "step": 4650 }, { "epoch": 0.3712780394348208, "grad_norm": 0.32054274323653675, "learning_rate": 1.448880472010696e-05, "loss": 0.1588, "step": 4651 }, { "epoch": 0.3713578670072643, "grad_norm": 0.2866806163239583, "learning_rate": 1.448649422407106e-05, "loss": 0.2407, "step": 4652 }, { "epoch": 0.3714376945797078, "grad_norm": 0.3134183512775954, "learning_rate": 1.4484183428130773e-05, "loss": 0.1876, "step": 4653 }, { "epoch": 0.37151752215215134, "grad_norm": 0.2868185060707078, "learning_rate": 1.4481872332440573e-05, "loss": 0.2118, "step": 4654 }, { "epoch": 0.37159734972459485, "grad_norm": 0.30306825641055607, "learning_rate": 1.4479560937154944e-05, "loss": 0.1521, "step": 4655 }, { "epoch": 0.3716771772970384, "grad_norm": 0.30300322820940984, "learning_rate": 1.4477249242428395e-05, "loss": 0.2043, "step": 4656 }, { "epoch": 0.37175700486948193, "grad_norm": 0.3625166472784607, "learning_rate": 1.4474937248415453e-05, "loss": 0.1979, "step": 4657 }, { "epoch": 0.37183683244192545, "grad_norm": 0.30553000144136994, "learning_rate": 1.4472624955270663e-05, "loss": 0.1536, "step": 4658 }, { "epoch": 0.37191666001436896, "grad_norm": 0.3440941808262313, "learning_rate": 1.4470312363148594e-05, "loss": 0.1804, "step": 4659 }, { "epoch": 0.3719964875868125, "grad_norm": 0.3964822027683718, "learning_rate": 1.4467999472203836e-05, "loss": 0.1601, "step": 4660 }, { "epoch": 0.372076315159256, "grad_norm": 0.33950044735364615, "learning_rate": 1.4465686282590994e-05, "loss": 0.2186, "step": 4661 }, { "epoch": 0.3721561427316995, "grad_norm": 0.2869187093609732, "learning_rate": 1.4463372794464696e-05, "loss": 0.1867, "step": 4662 }, { "epoch": 0.37223597030414307, "grad_norm": 0.32799860373984463, "learning_rate": 1.4461059007979591e-05, "loss": 0.1649, "step": 4663 }, { "epoch": 0.3723157978765866, "grad_norm": 0.31325318635103694, "learning_rate": 1.4458744923290349e-05, "loss": 0.2498, "step": 4664 }, { "epoch": 0.3723956254490301, "grad_norm": 0.2939271329941278, "learning_rate": 1.445643054055165e-05, "loss": 0.2098, "step": 4665 }, { "epoch": 0.3724754530214736, "grad_norm": 0.35289195281566194, "learning_rate": 1.4454115859918206e-05, "loss": 0.2096, "step": 4666 }, { "epoch": 0.3725552805939171, "grad_norm": 0.3965841263989313, "learning_rate": 1.4451800881544743e-05, "loss": 0.2141, "step": 4667 }, { "epoch": 0.37263510816636064, "grad_norm": 0.3031426977493011, "learning_rate": 1.444948560558601e-05, "loss": 0.2632, "step": 4668 }, { "epoch": 0.3727149357388042, "grad_norm": 0.28750601309654916, "learning_rate": 1.4447170032196773e-05, "loss": 0.2017, "step": 4669 }, { "epoch": 0.3727947633112477, "grad_norm": 0.3016176643236657, "learning_rate": 1.4444854161531816e-05, "loss": 0.1627, "step": 4670 }, { "epoch": 0.37287459088369124, "grad_norm": 0.3134430786292091, "learning_rate": 1.4442537993745953e-05, "loss": 0.1688, "step": 4671 }, { "epoch": 0.37295441845613475, "grad_norm": 0.2990285813543513, "learning_rate": 1.4440221528994002e-05, "loss": 0.2199, "step": 4672 }, { "epoch": 0.37303424602857826, "grad_norm": 0.28721457306880105, "learning_rate": 1.4437904767430814e-05, "loss": 0.19, "step": 4673 }, { "epoch": 0.3731140736010218, "grad_norm": 0.28955106959876253, "learning_rate": 1.4435587709211258e-05, "loss": 0.177, "step": 4674 }, { "epoch": 0.3731939011734653, "grad_norm": 0.30704524450496246, "learning_rate": 1.4433270354490215e-05, "loss": 0.1858, "step": 4675 }, { "epoch": 0.37327372874590886, "grad_norm": 0.3343586443033813, "learning_rate": 1.4430952703422594e-05, "loss": 0.1591, "step": 4676 }, { "epoch": 0.3733535563183524, "grad_norm": 0.26173107409126467, "learning_rate": 1.4428634756163318e-05, "loss": 0.1896, "step": 4677 }, { "epoch": 0.3734333838907959, "grad_norm": 0.3147493381326953, "learning_rate": 1.4426316512867338e-05, "loss": 0.1854, "step": 4678 }, { "epoch": 0.3735132114632394, "grad_norm": 0.2788838720657147, "learning_rate": 1.4423997973689614e-05, "loss": 0.156, "step": 4679 }, { "epoch": 0.3735930390356829, "grad_norm": 0.33648728179615717, "learning_rate": 1.4421679138785134e-05, "loss": 0.1642, "step": 4680 }, { "epoch": 0.37367286660812643, "grad_norm": 0.320158878040878, "learning_rate": 1.4419360008308899e-05, "loss": 0.1503, "step": 4681 }, { "epoch": 0.37375269418056994, "grad_norm": 0.27281637059072905, "learning_rate": 1.441704058241594e-05, "loss": 0.1358, "step": 4682 }, { "epoch": 0.3738325217530135, "grad_norm": 0.2866584794490501, "learning_rate": 1.4414720861261296e-05, "loss": 0.1958, "step": 4683 }, { "epoch": 0.373912349325457, "grad_norm": 0.33924347881284206, "learning_rate": 1.4412400845000032e-05, "loss": 0.1719, "step": 4684 }, { "epoch": 0.37399217689790054, "grad_norm": 0.2899041432282466, "learning_rate": 1.4410080533787236e-05, "loss": 0.2122, "step": 4685 }, { "epoch": 0.37407200447034405, "grad_norm": 0.3288918665917085, "learning_rate": 1.4407759927778009e-05, "loss": 0.1702, "step": 4686 }, { "epoch": 0.37415183204278757, "grad_norm": 0.3297260943707974, "learning_rate": 1.4405439027127475e-05, "loss": 0.2016, "step": 4687 }, { "epoch": 0.3742316596152311, "grad_norm": 0.27463035071409947, "learning_rate": 1.4403117831990775e-05, "loss": 0.1886, "step": 4688 }, { "epoch": 0.3743114871876746, "grad_norm": 0.27834078859096373, "learning_rate": 1.4400796342523072e-05, "loss": 0.196, "step": 4689 }, { "epoch": 0.37439131476011817, "grad_norm": 0.2714553826734505, "learning_rate": 1.4398474558879549e-05, "loss": 0.1868, "step": 4690 }, { "epoch": 0.3744711423325617, "grad_norm": 0.2920449751816478, "learning_rate": 1.4396152481215411e-05, "loss": 0.1707, "step": 4691 }, { "epoch": 0.3745509699050052, "grad_norm": 0.30765102367300745, "learning_rate": 1.4393830109685874e-05, "loss": 0.1619, "step": 4692 }, { "epoch": 0.3746307974774487, "grad_norm": 0.2677975386855416, "learning_rate": 1.4391507444446184e-05, "loss": 0.2348, "step": 4693 }, { "epoch": 0.3747106250498922, "grad_norm": 0.2730013609773868, "learning_rate": 1.4389184485651601e-05, "loss": 0.1791, "step": 4694 }, { "epoch": 0.37479045262233573, "grad_norm": 0.30993669655411327, "learning_rate": 1.4386861233457403e-05, "loss": 0.2105, "step": 4695 }, { "epoch": 0.3748702801947793, "grad_norm": 0.3319795276777253, "learning_rate": 1.4384537688018894e-05, "loss": 0.1769, "step": 4696 }, { "epoch": 0.3749501077672228, "grad_norm": 0.24045890746021506, "learning_rate": 1.438221384949139e-05, "loss": 0.163, "step": 4697 }, { "epoch": 0.37502993533966633, "grad_norm": 0.2715714481317193, "learning_rate": 1.4379889718030235e-05, "loss": 0.1726, "step": 4698 }, { "epoch": 0.37510976291210985, "grad_norm": 0.3346018647398466, "learning_rate": 1.4377565293790783e-05, "loss": 0.1884, "step": 4699 }, { "epoch": 0.37518959048455336, "grad_norm": 0.3156754832626469, "learning_rate": 1.4375240576928418e-05, "loss": 0.153, "step": 4700 }, { "epoch": 0.3752694180569969, "grad_norm": 0.272368292262366, "learning_rate": 1.4372915567598533e-05, "loss": 0.1924, "step": 4701 }, { "epoch": 0.3753492456294404, "grad_norm": 0.28971965732281635, "learning_rate": 1.437059026595655e-05, "loss": 0.1736, "step": 4702 }, { "epoch": 0.37542907320188396, "grad_norm": 0.30015742249985067, "learning_rate": 1.43682646721579e-05, "loss": 0.165, "step": 4703 }, { "epoch": 0.37550890077432747, "grad_norm": 0.369175192062391, "learning_rate": 1.4365938786358047e-05, "loss": 0.1916, "step": 4704 }, { "epoch": 0.375588728346771, "grad_norm": 0.3012318078302462, "learning_rate": 1.4363612608712462e-05, "loss": 0.1274, "step": 4705 }, { "epoch": 0.3756685559192145, "grad_norm": 0.2966824917485423, "learning_rate": 1.4361286139376645e-05, "loss": 0.2105, "step": 4706 }, { "epoch": 0.375748383491658, "grad_norm": 0.33604117976513326, "learning_rate": 1.4358959378506105e-05, "loss": 0.1736, "step": 4707 }, { "epoch": 0.3758282110641015, "grad_norm": 0.32976935082701736, "learning_rate": 1.4356632326256383e-05, "loss": 0.2168, "step": 4708 }, { "epoch": 0.37590803863654504, "grad_norm": 0.3223495524358941, "learning_rate": 1.4354304982783033e-05, "loss": 0.1695, "step": 4709 }, { "epoch": 0.3759878662089886, "grad_norm": 0.3413647421767609, "learning_rate": 1.4351977348241626e-05, "loss": 0.1974, "step": 4710 }, { "epoch": 0.3760676937814321, "grad_norm": 0.35220904133866077, "learning_rate": 1.4349649422787754e-05, "loss": 0.2219, "step": 4711 }, { "epoch": 0.37614752135387564, "grad_norm": 0.35031742049619896, "learning_rate": 1.4347321206577031e-05, "loss": 0.1849, "step": 4712 }, { "epoch": 0.37622734892631915, "grad_norm": 0.2780298933425769, "learning_rate": 1.4344992699765092e-05, "loss": 0.1782, "step": 4713 }, { "epoch": 0.37630717649876266, "grad_norm": 0.3587788170669609, "learning_rate": 1.4342663902507586e-05, "loss": 0.2166, "step": 4714 }, { "epoch": 0.3763870040712062, "grad_norm": 0.27910295796056195, "learning_rate": 1.4340334814960182e-05, "loss": 0.2104, "step": 4715 }, { "epoch": 0.3764668316436497, "grad_norm": 0.31796866597166756, "learning_rate": 1.4338005437278575e-05, "loss": 0.2164, "step": 4716 }, { "epoch": 0.37654665921609326, "grad_norm": 0.3623144458819012, "learning_rate": 1.4335675769618468e-05, "loss": 0.1841, "step": 4717 }, { "epoch": 0.3766264867885368, "grad_norm": 0.2864109560101192, "learning_rate": 1.4333345812135597e-05, "loss": 0.1888, "step": 4718 }, { "epoch": 0.3767063143609803, "grad_norm": 0.3185919555676117, "learning_rate": 1.4331015564985706e-05, "loss": 0.1454, "step": 4719 }, { "epoch": 0.3767861419334238, "grad_norm": 0.28632584074976075, "learning_rate": 1.4328685028324564e-05, "loss": 0.188, "step": 4720 }, { "epoch": 0.3768659695058673, "grad_norm": 0.29714602293224934, "learning_rate": 1.4326354202307958e-05, "loss": 0.1935, "step": 4721 }, { "epoch": 0.37694579707831083, "grad_norm": 0.46036926889228674, "learning_rate": 1.4324023087091696e-05, "loss": 0.1889, "step": 4722 }, { "epoch": 0.37702562465075434, "grad_norm": 0.3074220729532615, "learning_rate": 1.43216916828316e-05, "loss": 0.1755, "step": 4723 }, { "epoch": 0.3771054522231979, "grad_norm": 0.30530050882271187, "learning_rate": 1.4319359989683523e-05, "loss": 0.2, "step": 4724 }, { "epoch": 0.3771852797956414, "grad_norm": 0.2919852106140234, "learning_rate": 1.431702800780332e-05, "loss": 0.2017, "step": 4725 }, { "epoch": 0.37726510736808494, "grad_norm": 0.4507243983361485, "learning_rate": 1.4314695737346882e-05, "loss": 0.1744, "step": 4726 }, { "epoch": 0.37734493494052845, "grad_norm": 0.31082230873074124, "learning_rate": 1.4312363178470104e-05, "loss": 0.1903, "step": 4727 }, { "epoch": 0.37742476251297197, "grad_norm": 0.2820510262935933, "learning_rate": 1.4310030331328917e-05, "loss": 0.1531, "step": 4728 }, { "epoch": 0.3775045900854155, "grad_norm": 0.31619025795578, "learning_rate": 1.4307697196079258e-05, "loss": 0.2295, "step": 4729 }, { "epoch": 0.37758441765785905, "grad_norm": 0.26807153203968265, "learning_rate": 1.4305363772877087e-05, "loss": 0.186, "step": 4730 }, { "epoch": 0.37766424523030256, "grad_norm": 0.40310504823678933, "learning_rate": 1.4303030061878392e-05, "loss": 0.1936, "step": 4731 }, { "epoch": 0.3777440728027461, "grad_norm": 0.3706972896980962, "learning_rate": 1.430069606323916e-05, "loss": 0.1857, "step": 4732 }, { "epoch": 0.3778239003751896, "grad_norm": 0.3359705836567999, "learning_rate": 1.429836177711542e-05, "loss": 0.188, "step": 4733 }, { "epoch": 0.3779037279476331, "grad_norm": 0.32315656998088416, "learning_rate": 1.4296027203663203e-05, "loss": 0.2524, "step": 4734 }, { "epoch": 0.3779835555200766, "grad_norm": 0.33306960173250577, "learning_rate": 1.429369234303857e-05, "loss": 0.1702, "step": 4735 }, { "epoch": 0.37806338309252013, "grad_norm": 0.2931738656232267, "learning_rate": 1.4291357195397595e-05, "loss": 0.1932, "step": 4736 }, { "epoch": 0.3781432106649637, "grad_norm": 0.2899141206096805, "learning_rate": 1.4289021760896374e-05, "loss": 0.1743, "step": 4737 }, { "epoch": 0.3782230382374072, "grad_norm": 0.28443460373616714, "learning_rate": 1.4286686039691022e-05, "loss": 0.2098, "step": 4738 }, { "epoch": 0.37830286580985073, "grad_norm": 0.31694103649490346, "learning_rate": 1.4284350031937672e-05, "loss": 0.2298, "step": 4739 }, { "epoch": 0.37838269338229424, "grad_norm": 0.3426905328773612, "learning_rate": 1.4282013737792479e-05, "loss": 0.1843, "step": 4740 }, { "epoch": 0.37846252095473776, "grad_norm": 0.33889529455172557, "learning_rate": 1.427967715741161e-05, "loss": 0.1807, "step": 4741 }, { "epoch": 0.37854234852718127, "grad_norm": 0.3681225230619729, "learning_rate": 1.4277340290951264e-05, "loss": 0.1937, "step": 4742 }, { "epoch": 0.3786221760996248, "grad_norm": 0.2826556403569001, "learning_rate": 1.4275003138567644e-05, "loss": 0.154, "step": 4743 }, { "epoch": 0.37870200367206835, "grad_norm": 0.3208260030773662, "learning_rate": 1.4272665700416983e-05, "loss": 0.2506, "step": 4744 }, { "epoch": 0.37878183124451187, "grad_norm": 0.3248534907052872, "learning_rate": 1.427032797665553e-05, "loss": 0.1787, "step": 4745 }, { "epoch": 0.3788616588169554, "grad_norm": 0.29677916418421124, "learning_rate": 1.4267989967439551e-05, "loss": 0.2196, "step": 4746 }, { "epoch": 0.3789414863893989, "grad_norm": 0.314877047317097, "learning_rate": 1.4265651672925329e-05, "loss": 0.1507, "step": 4747 }, { "epoch": 0.3790213139618424, "grad_norm": 0.3049984517232845, "learning_rate": 1.4263313093269178e-05, "loss": 0.1946, "step": 4748 }, { "epoch": 0.3791011415342859, "grad_norm": 0.3461327432065617, "learning_rate": 1.4260974228627415e-05, "loss": 0.2027, "step": 4749 }, { "epoch": 0.37918096910672944, "grad_norm": 0.27246198907625585, "learning_rate": 1.4258635079156391e-05, "loss": 0.1927, "step": 4750 }, { "epoch": 0.379260796679173, "grad_norm": 0.2986528035033151, "learning_rate": 1.4256295645012461e-05, "loss": 0.1476, "step": 4751 }, { "epoch": 0.3793406242516165, "grad_norm": 0.30086288508496895, "learning_rate": 1.4253955926352014e-05, "loss": 0.1818, "step": 4752 }, { "epoch": 0.37942045182406003, "grad_norm": 0.3645538881795623, "learning_rate": 1.425161592333145e-05, "loss": 0.1315, "step": 4753 }, { "epoch": 0.37950027939650355, "grad_norm": 0.3202161445443755, "learning_rate": 1.4249275636107182e-05, "loss": 0.1409, "step": 4754 }, { "epoch": 0.37958010696894706, "grad_norm": 0.30343021366371375, "learning_rate": 1.4246935064835659e-05, "loss": 0.1154, "step": 4755 }, { "epoch": 0.3796599345413906, "grad_norm": 0.32760654668890804, "learning_rate": 1.4244594209673327e-05, "loss": 0.1903, "step": 4756 }, { "epoch": 0.37973976211383415, "grad_norm": 0.2941457411253206, "learning_rate": 1.4242253070776678e-05, "loss": 0.1465, "step": 4757 }, { "epoch": 0.37981958968627766, "grad_norm": 0.3341401203908516, "learning_rate": 1.4239911648302193e-05, "loss": 0.1744, "step": 4758 }, { "epoch": 0.3798994172587212, "grad_norm": 0.32284667068975575, "learning_rate": 1.4237569942406397e-05, "loss": 0.2043, "step": 4759 }, { "epoch": 0.3799792448311647, "grad_norm": 0.3475169335016618, "learning_rate": 1.4235227953245816e-05, "loss": 0.1292, "step": 4760 }, { "epoch": 0.3800590724036082, "grad_norm": 0.27194787607792115, "learning_rate": 1.4232885680977013e-05, "loss": 0.1964, "step": 4761 }, { "epoch": 0.3801388999760517, "grad_norm": 0.3014293307093157, "learning_rate": 1.423054312575655e-05, "loss": 0.1605, "step": 4762 }, { "epoch": 0.38021872754849523, "grad_norm": 0.33661891313550274, "learning_rate": 1.4228200287741021e-05, "loss": 0.1415, "step": 4763 }, { "epoch": 0.3802985551209388, "grad_norm": 0.2976623321901965, "learning_rate": 1.4225857167087037e-05, "loss": 0.1628, "step": 4764 }, { "epoch": 0.3803783826933823, "grad_norm": 0.2797965179656636, "learning_rate": 1.4223513763951222e-05, "loss": 0.1894, "step": 4765 }, { "epoch": 0.3804582102658258, "grad_norm": 0.33125596744914015, "learning_rate": 1.422117007849023e-05, "loss": 0.1712, "step": 4766 }, { "epoch": 0.38053803783826934, "grad_norm": 0.31920375501655757, "learning_rate": 1.421882611086072e-05, "loss": 0.2103, "step": 4767 }, { "epoch": 0.38061786541071285, "grad_norm": 0.41749716636264667, "learning_rate": 1.4216481861219385e-05, "loss": 0.184, "step": 4768 }, { "epoch": 0.38069769298315637, "grad_norm": 0.312089630500175, "learning_rate": 1.4214137329722919e-05, "loss": 0.1912, "step": 4769 }, { "epoch": 0.3807775205555999, "grad_norm": 0.2894911229359483, "learning_rate": 1.4211792516528052e-05, "loss": 0.1648, "step": 4770 }, { "epoch": 0.38085734812804345, "grad_norm": 0.37850945985660245, "learning_rate": 1.420944742179152e-05, "loss": 0.2201, "step": 4771 }, { "epoch": 0.38093717570048696, "grad_norm": 0.39450339121570266, "learning_rate": 1.4207102045670089e-05, "loss": 0.2316, "step": 4772 }, { "epoch": 0.3810170032729305, "grad_norm": 0.3380932432866817, "learning_rate": 1.4204756388320535e-05, "loss": 0.1523, "step": 4773 }, { "epoch": 0.381096830845374, "grad_norm": 0.30469800464053276, "learning_rate": 1.4202410449899658e-05, "loss": 0.1862, "step": 4774 }, { "epoch": 0.3811766584178175, "grad_norm": 0.31651640710019946, "learning_rate": 1.420006423056427e-05, "loss": 0.1727, "step": 4775 }, { "epoch": 0.381256485990261, "grad_norm": 0.34042917309772786, "learning_rate": 1.4197717730471211e-05, "loss": 0.1581, "step": 4776 }, { "epoch": 0.38133631356270453, "grad_norm": 0.33591503269148826, "learning_rate": 1.4195370949777337e-05, "loss": 0.1765, "step": 4777 }, { "epoch": 0.3814161411351481, "grad_norm": 0.24949178229325475, "learning_rate": 1.4193023888639513e-05, "loss": 0.1682, "step": 4778 }, { "epoch": 0.3814959687075916, "grad_norm": 0.2928683907628006, "learning_rate": 1.4190676547214638e-05, "loss": 0.1796, "step": 4779 }, { "epoch": 0.38157579628003513, "grad_norm": 0.3338324812368564, "learning_rate": 1.4188328925659618e-05, "loss": 0.1303, "step": 4780 }, { "epoch": 0.38165562385247864, "grad_norm": 0.36226143745650796, "learning_rate": 1.4185981024131388e-05, "loss": 0.166, "step": 4781 }, { "epoch": 0.38173545142492216, "grad_norm": 0.32850724828751826, "learning_rate": 1.418363284278689e-05, "loss": 0.1621, "step": 4782 }, { "epoch": 0.38181527899736567, "grad_norm": 0.3190495659053493, "learning_rate": 1.4181284381783093e-05, "loss": 0.152, "step": 4783 }, { "epoch": 0.38189510656980924, "grad_norm": 0.320468122276126, "learning_rate": 1.4178935641276982e-05, "loss": 0.1972, "step": 4784 }, { "epoch": 0.38197493414225275, "grad_norm": 0.2887846454794376, "learning_rate": 1.4176586621425562e-05, "loss": 0.1962, "step": 4785 }, { "epoch": 0.38205476171469627, "grad_norm": 0.3735655112810368, "learning_rate": 1.4174237322385856e-05, "loss": 0.1561, "step": 4786 }, { "epoch": 0.3821345892871398, "grad_norm": 0.3092689139838858, "learning_rate": 1.41718877443149e-05, "loss": 0.1918, "step": 4787 }, { "epoch": 0.3822144168595833, "grad_norm": 0.3258529951509455, "learning_rate": 1.4169537887369762e-05, "loss": 0.1763, "step": 4788 }, { "epoch": 0.3822942444320268, "grad_norm": 0.3502180599856141, "learning_rate": 1.4167187751707514e-05, "loss": 0.1497, "step": 4789 }, { "epoch": 0.3823740720044703, "grad_norm": 0.30134635812495636, "learning_rate": 1.416483733748526e-05, "loss": 0.146, "step": 4790 }, { "epoch": 0.3824538995769139, "grad_norm": 0.38873777649649643, "learning_rate": 1.416248664486011e-05, "loss": 0.1769, "step": 4791 }, { "epoch": 0.3825337271493574, "grad_norm": 0.3054727688410385, "learning_rate": 1.4160135673989201e-05, "loss": 0.1742, "step": 4792 }, { "epoch": 0.3826135547218009, "grad_norm": 0.3013288173658207, "learning_rate": 1.4157784425029684e-05, "loss": 0.1851, "step": 4793 }, { "epoch": 0.38269338229424443, "grad_norm": 0.32396517588680906, "learning_rate": 1.4155432898138735e-05, "loss": 0.1984, "step": 4794 }, { "epoch": 0.38277320986668795, "grad_norm": 0.3158693434032797, "learning_rate": 1.4153081093473538e-05, "loss": 0.2027, "step": 4795 }, { "epoch": 0.38285303743913146, "grad_norm": 0.28713922929678287, "learning_rate": 1.4150729011191309e-05, "loss": 0.1517, "step": 4796 }, { "epoch": 0.382932865011575, "grad_norm": 0.2767976570119402, "learning_rate": 1.4148376651449266e-05, "loss": 0.1925, "step": 4797 }, { "epoch": 0.38301269258401854, "grad_norm": 0.31149854887167816, "learning_rate": 1.4146024014404666e-05, "loss": 0.2068, "step": 4798 }, { "epoch": 0.38309252015646206, "grad_norm": 0.27109087436578405, "learning_rate": 1.4143671100214769e-05, "loss": 0.1839, "step": 4799 }, { "epoch": 0.38317234772890557, "grad_norm": 0.260107116003808, "learning_rate": 1.4141317909036853e-05, "loss": 0.2053, "step": 4800 }, { "epoch": 0.3832521753013491, "grad_norm": 0.30780647611078854, "learning_rate": 1.4138964441028229e-05, "loss": 0.1806, "step": 4801 }, { "epoch": 0.3833320028737926, "grad_norm": 0.28623975741014734, "learning_rate": 1.4136610696346206e-05, "loss": 0.1812, "step": 4802 }, { "epoch": 0.3834118304462361, "grad_norm": 0.3132297738113716, "learning_rate": 1.4134256675148132e-05, "loss": 0.1443, "step": 4803 }, { "epoch": 0.3834916580186796, "grad_norm": 0.2983851885736144, "learning_rate": 1.4131902377591358e-05, "loss": 0.1847, "step": 4804 }, { "epoch": 0.3835714855911232, "grad_norm": 0.307648277154295, "learning_rate": 1.4129547803833263e-05, "loss": 0.2533, "step": 4805 }, { "epoch": 0.3836513131635667, "grad_norm": 0.26282636938222087, "learning_rate": 1.412719295403124e-05, "loss": 0.1764, "step": 4806 }, { "epoch": 0.3837311407360102, "grad_norm": 0.26133144407414716, "learning_rate": 1.41248378283427e-05, "loss": 0.2136, "step": 4807 }, { "epoch": 0.38381096830845374, "grad_norm": 0.3201254734760278, "learning_rate": 1.4122482426925076e-05, "loss": 0.2226, "step": 4808 }, { "epoch": 0.38389079588089725, "grad_norm": 0.322271156614744, "learning_rate": 1.4120126749935814e-05, "loss": 0.1943, "step": 4809 }, { "epoch": 0.38397062345334076, "grad_norm": 0.29805319572504624, "learning_rate": 1.4117770797532386e-05, "loss": 0.1743, "step": 4810 }, { "epoch": 0.3840504510257843, "grad_norm": 0.277468879607658, "learning_rate": 1.4115414569872276e-05, "loss": 0.147, "step": 4811 }, { "epoch": 0.38413027859822785, "grad_norm": 0.3100325180382198, "learning_rate": 1.4113058067112987e-05, "loss": 0.1578, "step": 4812 }, { "epoch": 0.38421010617067136, "grad_norm": 0.35591950202573236, "learning_rate": 1.4110701289412045e-05, "loss": 0.1924, "step": 4813 }, { "epoch": 0.3842899337431149, "grad_norm": 0.3151402760405371, "learning_rate": 1.4108344236926991e-05, "loss": 0.1858, "step": 4814 }, { "epoch": 0.3843697613155584, "grad_norm": 0.2826892879596837, "learning_rate": 1.4105986909815382e-05, "loss": 0.1942, "step": 4815 }, { "epoch": 0.3844495888880019, "grad_norm": 0.38480552388868083, "learning_rate": 1.4103629308234794e-05, "loss": 0.2226, "step": 4816 }, { "epoch": 0.3845294164604454, "grad_norm": 0.33607078696714476, "learning_rate": 1.4101271432342829e-05, "loss": 0.1492, "step": 4817 }, { "epoch": 0.384609244032889, "grad_norm": 0.382864527687929, "learning_rate": 1.40989132822971e-05, "loss": 0.2263, "step": 4818 }, { "epoch": 0.3846890716053325, "grad_norm": 0.30156416513086903, "learning_rate": 1.4096554858255238e-05, "loss": 0.1925, "step": 4819 }, { "epoch": 0.384768899177776, "grad_norm": 0.2670124155825131, "learning_rate": 1.4094196160374896e-05, "loss": 0.1927, "step": 4820 }, { "epoch": 0.3848487267502195, "grad_norm": 0.33062856331620255, "learning_rate": 1.4091837188813741e-05, "loss": 0.1765, "step": 4821 }, { "epoch": 0.38492855432266304, "grad_norm": 0.3546057669650701, "learning_rate": 1.4089477943729466e-05, "loss": 0.2205, "step": 4822 }, { "epoch": 0.38500838189510656, "grad_norm": 0.307563536063231, "learning_rate": 1.4087118425279773e-05, "loss": 0.1685, "step": 4823 }, { "epoch": 0.38508820946755007, "grad_norm": 0.3113248926408518, "learning_rate": 1.408475863362239e-05, "loss": 0.2027, "step": 4824 }, { "epoch": 0.38516803703999364, "grad_norm": 0.38355511406689696, "learning_rate": 1.4082398568915055e-05, "loss": 0.145, "step": 4825 }, { "epoch": 0.38524786461243715, "grad_norm": 0.332960129877706, "learning_rate": 1.408003823131553e-05, "loss": 0.1923, "step": 4826 }, { "epoch": 0.38532769218488067, "grad_norm": 0.25805988633572324, "learning_rate": 1.4077677620981598e-05, "loss": 0.1277, "step": 4827 }, { "epoch": 0.3854075197573242, "grad_norm": 0.2670960579738852, "learning_rate": 1.4075316738071053e-05, "loss": 0.1733, "step": 4828 }, { "epoch": 0.3854873473297677, "grad_norm": 0.3266643644465381, "learning_rate": 1.4072955582741713e-05, "loss": 0.1579, "step": 4829 }, { "epoch": 0.3855671749022112, "grad_norm": 0.3498853677590482, "learning_rate": 1.4070594155151409e-05, "loss": 0.1514, "step": 4830 }, { "epoch": 0.3856470024746547, "grad_norm": 0.3302514864122319, "learning_rate": 1.4068232455457996e-05, "loss": 0.165, "step": 4831 }, { "epoch": 0.3857268300470983, "grad_norm": 0.2640245402165534, "learning_rate": 1.4065870483819344e-05, "loss": 0.1829, "step": 4832 }, { "epoch": 0.3858066576195418, "grad_norm": 0.2745860953128178, "learning_rate": 1.4063508240393338e-05, "loss": 0.1543, "step": 4833 }, { "epoch": 0.3858864851919853, "grad_norm": 0.33855374912575825, "learning_rate": 1.406114572533789e-05, "loss": 0.1814, "step": 4834 }, { "epoch": 0.38596631276442883, "grad_norm": 0.3114609912942701, "learning_rate": 1.405878293881092e-05, "loss": 0.2027, "step": 4835 }, { "epoch": 0.38604614033687235, "grad_norm": 0.3589067557227744, "learning_rate": 1.4056419880970376e-05, "loss": 0.149, "step": 4836 }, { "epoch": 0.38612596790931586, "grad_norm": 0.3152542807561235, "learning_rate": 1.4054056551974212e-05, "loss": 0.1955, "step": 4837 }, { "epoch": 0.3862057954817594, "grad_norm": 0.36582479166394943, "learning_rate": 1.4051692951980414e-05, "loss": 0.2092, "step": 4838 }, { "epoch": 0.38628562305420294, "grad_norm": 0.42953717712502526, "learning_rate": 1.4049329081146971e-05, "loss": 0.1675, "step": 4839 }, { "epoch": 0.38636545062664646, "grad_norm": 0.3057215500547268, "learning_rate": 1.4046964939631911e-05, "loss": 0.1512, "step": 4840 }, { "epoch": 0.38644527819908997, "grad_norm": 0.29683044059171654, "learning_rate": 1.4044600527593254e-05, "loss": 0.1673, "step": 4841 }, { "epoch": 0.3865251057715335, "grad_norm": 0.30173997618691917, "learning_rate": 1.4042235845189063e-05, "loss": 0.1517, "step": 4842 }, { "epoch": 0.386604933343977, "grad_norm": 0.3272499229921231, "learning_rate": 1.4039870892577401e-05, "loss": 0.1966, "step": 4843 }, { "epoch": 0.3866847609164205, "grad_norm": 0.3136473910857798, "learning_rate": 1.4037505669916357e-05, "loss": 0.1724, "step": 4844 }, { "epoch": 0.3867645884888641, "grad_norm": 0.3512975031306751, "learning_rate": 1.4035140177364037e-05, "loss": 0.1593, "step": 4845 }, { "epoch": 0.3868444160613076, "grad_norm": 0.33721780572785864, "learning_rate": 1.4032774415078563e-05, "loss": 0.244, "step": 4846 }, { "epoch": 0.3869242436337511, "grad_norm": 0.3357324168426275, "learning_rate": 1.4030408383218082e-05, "loss": 0.1953, "step": 4847 }, { "epoch": 0.3870040712061946, "grad_norm": 0.3251531484773841, "learning_rate": 1.4028042081940748e-05, "loss": 0.1837, "step": 4848 }, { "epoch": 0.38708389877863814, "grad_norm": 0.40080347262867067, "learning_rate": 1.4025675511404745e-05, "loss": 0.2169, "step": 4849 }, { "epoch": 0.38716372635108165, "grad_norm": 0.3570465872230252, "learning_rate": 1.4023308671768261e-05, "loss": 0.2094, "step": 4850 }, { "epoch": 0.38724355392352516, "grad_norm": 0.33535233433705, "learning_rate": 1.4020941563189517e-05, "loss": 0.1649, "step": 4851 }, { "epoch": 0.38732338149596873, "grad_norm": 0.28014776115975376, "learning_rate": 1.401857418582674e-05, "loss": 0.1595, "step": 4852 }, { "epoch": 0.38740320906841225, "grad_norm": 0.29860196656964477, "learning_rate": 1.4016206539838185e-05, "loss": 0.1516, "step": 4853 }, { "epoch": 0.38748303664085576, "grad_norm": 0.3409896938695548, "learning_rate": 1.4013838625382115e-05, "loss": 0.1644, "step": 4854 }, { "epoch": 0.3875628642132993, "grad_norm": 0.30792061709705637, "learning_rate": 1.4011470442616815e-05, "loss": 0.1864, "step": 4855 }, { "epoch": 0.3876426917857428, "grad_norm": 0.27560338290438535, "learning_rate": 1.4009101991700593e-05, "loss": 0.2157, "step": 4856 }, { "epoch": 0.3877225193581863, "grad_norm": 0.27998302198729913, "learning_rate": 1.4006733272791768e-05, "loss": 0.1417, "step": 4857 }, { "epoch": 0.3878023469306298, "grad_norm": 0.32073225667070493, "learning_rate": 1.4004364286048684e-05, "loss": 0.1605, "step": 4858 }, { "epoch": 0.3878821745030734, "grad_norm": 0.3582146746471316, "learning_rate": 1.400199503162969e-05, "loss": 0.1847, "step": 4859 }, { "epoch": 0.3879620020755169, "grad_norm": 0.35291289054450997, "learning_rate": 1.399962550969317e-05, "loss": 0.1923, "step": 4860 }, { "epoch": 0.3880418296479604, "grad_norm": 0.31271672784165555, "learning_rate": 1.3997255720397508e-05, "loss": 0.204, "step": 4861 }, { "epoch": 0.3881216572204039, "grad_norm": 0.3123471725207112, "learning_rate": 1.3994885663901122e-05, "loss": 0.1941, "step": 4862 }, { "epoch": 0.38820148479284744, "grad_norm": 0.3458066165599775, "learning_rate": 1.3992515340362436e-05, "loss": 0.1669, "step": 4863 }, { "epoch": 0.38828131236529095, "grad_norm": 0.28132872691498045, "learning_rate": 1.3990144749939902e-05, "loss": 0.1875, "step": 4864 }, { "epoch": 0.38836113993773447, "grad_norm": 0.30106451046702243, "learning_rate": 1.3987773892791984e-05, "loss": 0.1356, "step": 4865 }, { "epoch": 0.38844096751017804, "grad_norm": 0.32404059215087494, "learning_rate": 1.3985402769077161e-05, "loss": 0.1528, "step": 4866 }, { "epoch": 0.38852079508262155, "grad_norm": 0.3456050822486683, "learning_rate": 1.3983031378953934e-05, "loss": 0.1991, "step": 4867 }, { "epoch": 0.38860062265506506, "grad_norm": 0.3501310210926657, "learning_rate": 1.3980659722580823e-05, "loss": 0.1557, "step": 4868 }, { "epoch": 0.3886804502275086, "grad_norm": 0.293985320628895, "learning_rate": 1.3978287800116364e-05, "loss": 0.1729, "step": 4869 }, { "epoch": 0.3887602777999521, "grad_norm": 0.3387046048868448, "learning_rate": 1.3975915611719104e-05, "loss": 0.1432, "step": 4870 }, { "epoch": 0.3888401053723956, "grad_norm": 0.33874963117599227, "learning_rate": 1.3973543157547624e-05, "loss": 0.2009, "step": 4871 }, { "epoch": 0.3889199329448392, "grad_norm": 0.3627154973582105, "learning_rate": 1.3971170437760509e-05, "loss": 0.1472, "step": 4872 }, { "epoch": 0.3889997605172827, "grad_norm": 0.36688665153077743, "learning_rate": 1.3968797452516365e-05, "loss": 0.1495, "step": 4873 }, { "epoch": 0.3890795880897262, "grad_norm": 0.3300321954968815, "learning_rate": 1.3966424201973818e-05, "loss": 0.1841, "step": 4874 }, { "epoch": 0.3891594156621697, "grad_norm": 0.3426009516522819, "learning_rate": 1.3964050686291509e-05, "loss": 0.1919, "step": 4875 }, { "epoch": 0.38923924323461323, "grad_norm": 0.30987765096599756, "learning_rate": 1.39616769056281e-05, "loss": 0.1999, "step": 4876 }, { "epoch": 0.38931907080705674, "grad_norm": 0.2917332825751619, "learning_rate": 1.3959302860142265e-05, "loss": 0.232, "step": 4877 }, { "epoch": 0.38939889837950026, "grad_norm": 0.29554203436360904, "learning_rate": 1.3956928549992706e-05, "loss": 0.1584, "step": 4878 }, { "epoch": 0.3894787259519438, "grad_norm": 0.2972772296579239, "learning_rate": 1.3954553975338132e-05, "loss": 0.1874, "step": 4879 }, { "epoch": 0.38955855352438734, "grad_norm": 0.3087600725797432, "learning_rate": 1.3952179136337276e-05, "loss": 0.1996, "step": 4880 }, { "epoch": 0.38963838109683085, "grad_norm": 0.3198939822900544, "learning_rate": 1.394980403314888e-05, "loss": 0.1809, "step": 4881 }, { "epoch": 0.38971820866927437, "grad_norm": 0.30882706749528777, "learning_rate": 1.394742866593172e-05, "loss": 0.1615, "step": 4882 }, { "epoch": 0.3897980362417179, "grad_norm": 0.36827617414182784, "learning_rate": 1.3945053034844573e-05, "loss": 0.1796, "step": 4883 }, { "epoch": 0.3898778638141614, "grad_norm": 0.3404558303755041, "learning_rate": 1.3942677140046245e-05, "loss": 0.2093, "step": 4884 }, { "epoch": 0.3899576913866049, "grad_norm": 0.32541964425687875, "learning_rate": 1.394030098169555e-05, "loss": 0.1643, "step": 4885 }, { "epoch": 0.3900375189590485, "grad_norm": 0.2747448586494841, "learning_rate": 1.3937924559951333e-05, "loss": 0.1754, "step": 4886 }, { "epoch": 0.390117346531492, "grad_norm": 0.2897861058928205, "learning_rate": 1.393554787497244e-05, "loss": 0.1561, "step": 4887 }, { "epoch": 0.3901971741039355, "grad_norm": 0.31293767347666607, "learning_rate": 1.3933170926917745e-05, "loss": 0.227, "step": 4888 }, { "epoch": 0.390277001676379, "grad_norm": 0.2692497024097019, "learning_rate": 1.393079371594614e-05, "loss": 0.1535, "step": 4889 }, { "epoch": 0.39035682924882253, "grad_norm": 0.3226333619687945, "learning_rate": 1.392841624221653e-05, "loss": 0.2361, "step": 4890 }, { "epoch": 0.39043665682126605, "grad_norm": 0.3597828443301343, "learning_rate": 1.3926038505887842e-05, "loss": 0.1554, "step": 4891 }, { "epoch": 0.39051648439370956, "grad_norm": 0.32361302530353775, "learning_rate": 1.3923660507119014e-05, "loss": 0.2142, "step": 4892 }, { "epoch": 0.39059631196615313, "grad_norm": 0.33014472701440334, "learning_rate": 1.3921282246069013e-05, "loss": 0.1631, "step": 4893 }, { "epoch": 0.39067613953859665, "grad_norm": 0.33914580628051105, "learning_rate": 1.3918903722896808e-05, "loss": 0.1853, "step": 4894 }, { "epoch": 0.39075596711104016, "grad_norm": 0.2937749971114057, "learning_rate": 1.3916524937761403e-05, "loss": 0.1651, "step": 4895 }, { "epoch": 0.3908357946834837, "grad_norm": 0.2748118094183606, "learning_rate": 1.3914145890821802e-05, "loss": 0.2395, "step": 4896 }, { "epoch": 0.3909156222559272, "grad_norm": 0.31264372560127296, "learning_rate": 1.3911766582237036e-05, "loss": 0.1688, "step": 4897 }, { "epoch": 0.3909954498283707, "grad_norm": 0.2928421641700058, "learning_rate": 1.3909387012166158e-05, "loss": 0.1589, "step": 4898 }, { "epoch": 0.3910752774008142, "grad_norm": 0.30926846989884565, "learning_rate": 1.3907007180768227e-05, "loss": 0.2156, "step": 4899 }, { "epoch": 0.3911551049732578, "grad_norm": 0.28200443100752953, "learning_rate": 1.390462708820233e-05, "loss": 0.1346, "step": 4900 }, { "epoch": 0.3912349325457013, "grad_norm": 0.27984686634525796, "learning_rate": 1.390224673462756e-05, "loss": 0.2039, "step": 4901 }, { "epoch": 0.3913147601181448, "grad_norm": 0.2615778892799061, "learning_rate": 1.3899866120203044e-05, "loss": 0.1904, "step": 4902 }, { "epoch": 0.3913945876905883, "grad_norm": 0.3302607651556018, "learning_rate": 1.389748524508791e-05, "loss": 0.2164, "step": 4903 }, { "epoch": 0.39147441526303184, "grad_norm": 0.3238931973804314, "learning_rate": 1.3895104109441312e-05, "loss": 0.2213, "step": 4904 }, { "epoch": 0.39155424283547535, "grad_norm": 0.332565514534471, "learning_rate": 1.3892722713422412e-05, "loss": 0.215, "step": 4905 }, { "epoch": 0.3916340704079189, "grad_norm": 0.2809153932612141, "learning_rate": 1.3890341057190413e-05, "loss": 0.1835, "step": 4906 }, { "epoch": 0.39171389798036244, "grad_norm": 0.2887348324642031, "learning_rate": 1.3887959140904503e-05, "loss": 0.1193, "step": 4907 }, { "epoch": 0.39179372555280595, "grad_norm": 0.27839979417894173, "learning_rate": 1.3885576964723915e-05, "loss": 0.2288, "step": 4908 }, { "epoch": 0.39187355312524946, "grad_norm": 0.2639907478733623, "learning_rate": 1.3883194528807881e-05, "loss": 0.2051, "step": 4909 }, { "epoch": 0.391953380697693, "grad_norm": 0.28901398209875123, "learning_rate": 1.3880811833315663e-05, "loss": 0.201, "step": 4910 }, { "epoch": 0.3920332082701365, "grad_norm": 0.25924510472204826, "learning_rate": 1.3878428878406532e-05, "loss": 0.1964, "step": 4911 }, { "epoch": 0.39211303584258, "grad_norm": 0.3033717364017947, "learning_rate": 1.3876045664239777e-05, "loss": 0.2236, "step": 4912 }, { "epoch": 0.3921928634150236, "grad_norm": 0.2916388396320962, "learning_rate": 1.387366219097471e-05, "loss": 0.2105, "step": 4913 }, { "epoch": 0.3922726909874671, "grad_norm": 0.314602964399113, "learning_rate": 1.3871278458770651e-05, "loss": 0.1716, "step": 4914 }, { "epoch": 0.3923525185599106, "grad_norm": 0.2620089280744756, "learning_rate": 1.3868894467786953e-05, "loss": 0.1639, "step": 4915 }, { "epoch": 0.3924323461323541, "grad_norm": 0.2821982755899837, "learning_rate": 1.3866510218182967e-05, "loss": 0.1648, "step": 4916 }, { "epoch": 0.39251217370479763, "grad_norm": 0.28973586597363626, "learning_rate": 1.386412571011808e-05, "loss": 0.1994, "step": 4917 }, { "epoch": 0.39259200127724114, "grad_norm": 0.32926612198670285, "learning_rate": 1.3861740943751673e-05, "loss": 0.173, "step": 4918 }, { "epoch": 0.39267182884968466, "grad_norm": 0.28755173015141877, "learning_rate": 1.3859355919243172e-05, "loss": 0.1811, "step": 4919 }, { "epoch": 0.3927516564221282, "grad_norm": 0.3525990511129529, "learning_rate": 1.3856970636751998e-05, "loss": 0.221, "step": 4920 }, { "epoch": 0.39283148399457174, "grad_norm": 0.3103737318099628, "learning_rate": 1.38545850964376e-05, "loss": 0.2183, "step": 4921 }, { "epoch": 0.39291131156701525, "grad_norm": 0.28601922030687366, "learning_rate": 1.3852199298459448e-05, "loss": 0.154, "step": 4922 }, { "epoch": 0.39299113913945877, "grad_norm": 0.353074108969725, "learning_rate": 1.3849813242977013e-05, "loss": 0.1821, "step": 4923 }, { "epoch": 0.3930709667119023, "grad_norm": 0.28753310209859206, "learning_rate": 1.3847426930149798e-05, "loss": 0.1563, "step": 4924 }, { "epoch": 0.3931507942843458, "grad_norm": 0.34573626104669636, "learning_rate": 1.3845040360137323e-05, "loss": 0.1697, "step": 4925 }, { "epoch": 0.3932306218567893, "grad_norm": 0.2918850498928334, "learning_rate": 1.3842653533099113e-05, "loss": 0.174, "step": 4926 }, { "epoch": 0.3933104494292329, "grad_norm": 0.37235839083041145, "learning_rate": 1.3840266449194722e-05, "loss": 0.1497, "step": 4927 }, { "epoch": 0.3933902770016764, "grad_norm": 0.343700926626467, "learning_rate": 1.3837879108583714e-05, "loss": 0.1593, "step": 4928 }, { "epoch": 0.3934701045741199, "grad_norm": 0.35396635077195465, "learning_rate": 1.3835491511425678e-05, "loss": 0.1769, "step": 4929 }, { "epoch": 0.3935499321465634, "grad_norm": 0.2844253734016276, "learning_rate": 1.3833103657880214e-05, "loss": 0.1758, "step": 4930 }, { "epoch": 0.39362975971900693, "grad_norm": 0.32701878526859496, "learning_rate": 1.3830715548106937e-05, "loss": 0.1573, "step": 4931 }, { "epoch": 0.39370958729145045, "grad_norm": 0.41555731418833625, "learning_rate": 1.3828327182265486e-05, "loss": 0.1988, "step": 4932 }, { "epoch": 0.393789414863894, "grad_norm": 0.3852463889399044, "learning_rate": 1.3825938560515515e-05, "loss": 0.1873, "step": 4933 }, { "epoch": 0.39386924243633753, "grad_norm": 0.3172332740998887, "learning_rate": 1.382354968301669e-05, "loss": 0.2052, "step": 4934 }, { "epoch": 0.39394907000878104, "grad_norm": 0.36246404407767574, "learning_rate": 1.3821160549928702e-05, "loss": 0.166, "step": 4935 }, { "epoch": 0.39402889758122456, "grad_norm": 0.2861360090151833, "learning_rate": 1.381877116141125e-05, "loss": 0.1954, "step": 4936 }, { "epoch": 0.39410872515366807, "grad_norm": 0.38093715556813246, "learning_rate": 1.3816381517624062e-05, "loss": 0.1908, "step": 4937 }, { "epoch": 0.3941885527261116, "grad_norm": 0.3432032272272916, "learning_rate": 1.3813991618726872e-05, "loss": 0.2025, "step": 4938 }, { "epoch": 0.3942683802985551, "grad_norm": 0.2918167073037589, "learning_rate": 1.3811601464879436e-05, "loss": 0.182, "step": 4939 }, { "epoch": 0.39434820787099867, "grad_norm": 0.3114613452056297, "learning_rate": 1.3809211056241526e-05, "loss": 0.2222, "step": 4940 }, { "epoch": 0.3944280354434422, "grad_norm": 0.34118322067960116, "learning_rate": 1.380682039297293e-05, "loss": 0.1839, "step": 4941 }, { "epoch": 0.3945078630158857, "grad_norm": 0.3499117229767841, "learning_rate": 1.3804429475233456e-05, "loss": 0.1508, "step": 4942 }, { "epoch": 0.3945876905883292, "grad_norm": 0.2844468716291081, "learning_rate": 1.3802038303182928e-05, "loss": 0.2327, "step": 4943 }, { "epoch": 0.3946675181607727, "grad_norm": 0.35190745399949763, "learning_rate": 1.3799646876981185e-05, "loss": 0.1624, "step": 4944 }, { "epoch": 0.39474734573321624, "grad_norm": 0.278512807801064, "learning_rate": 1.3797255196788086e-05, "loss": 0.2087, "step": 4945 }, { "epoch": 0.39482717330565975, "grad_norm": 0.29474126208316076, "learning_rate": 1.3794863262763504e-05, "loss": 0.2271, "step": 4946 }, { "epoch": 0.3949070008781033, "grad_norm": 0.35532184406559286, "learning_rate": 1.3792471075067332e-05, "loss": 0.1888, "step": 4947 }, { "epoch": 0.39498682845054683, "grad_norm": 0.30729341731846843, "learning_rate": 1.3790078633859478e-05, "loss": 0.211, "step": 4948 }, { "epoch": 0.39506665602299035, "grad_norm": 0.338071336089844, "learning_rate": 1.3787685939299862e-05, "loss": 0.1806, "step": 4949 }, { "epoch": 0.39514648359543386, "grad_norm": 0.3290481541689258, "learning_rate": 1.3785292991548437e-05, "loss": 0.1851, "step": 4950 }, { "epoch": 0.3952263111678774, "grad_norm": 0.29728463631795005, "learning_rate": 1.3782899790765147e-05, "loss": 0.1814, "step": 4951 }, { "epoch": 0.3953061387403209, "grad_norm": 0.2606860411358087, "learning_rate": 1.3780506337109983e-05, "loss": 0.1872, "step": 4952 }, { "epoch": 0.3953859663127644, "grad_norm": 0.28576668464429356, "learning_rate": 1.3778112630742927e-05, "loss": 0.2133, "step": 4953 }, { "epoch": 0.395465793885208, "grad_norm": 0.31658873583740527, "learning_rate": 1.3775718671823996e-05, "loss": 0.2145, "step": 4954 }, { "epoch": 0.3955456214576515, "grad_norm": 0.3468612536030628, "learning_rate": 1.3773324460513216e-05, "loss": 0.1554, "step": 4955 }, { "epoch": 0.395625449030095, "grad_norm": 0.35415658121289584, "learning_rate": 1.3770929996970623e-05, "loss": 0.1483, "step": 4956 }, { "epoch": 0.3957052766025385, "grad_norm": 0.28659016258615905, "learning_rate": 1.3768535281356287e-05, "loss": 0.1466, "step": 4957 }, { "epoch": 0.395785104174982, "grad_norm": 0.2904730405923112, "learning_rate": 1.3766140313830279e-05, "loss": 0.1692, "step": 4958 }, { "epoch": 0.39586493174742554, "grad_norm": 0.34400852129108134, "learning_rate": 1.3763745094552697e-05, "loss": 0.2078, "step": 4959 }, { "epoch": 0.39594475931986906, "grad_norm": 0.34916584309319315, "learning_rate": 1.3761349623683648e-05, "loss": 0.1773, "step": 4960 }, { "epoch": 0.3960245868923126, "grad_norm": 0.2658485740950093, "learning_rate": 1.3758953901383265e-05, "loss": 0.1483, "step": 4961 }, { "epoch": 0.39610441446475614, "grad_norm": 0.26551239627204537, "learning_rate": 1.3756557927811686e-05, "loss": 0.1598, "step": 4962 }, { "epoch": 0.39618424203719965, "grad_norm": 0.266805001523364, "learning_rate": 1.375416170312908e-05, "loss": 0.1909, "step": 4963 }, { "epoch": 0.39626406960964317, "grad_norm": 0.2818579137306798, "learning_rate": 1.3751765227495616e-05, "loss": 0.1736, "step": 4964 }, { "epoch": 0.3963438971820867, "grad_norm": 0.27379846136977054, "learning_rate": 1.3749368501071496e-05, "loss": 0.1737, "step": 4965 }, { "epoch": 0.3964237247545302, "grad_norm": 0.32761907267371504, "learning_rate": 1.3746971524016929e-05, "loss": 0.1639, "step": 4966 }, { "epoch": 0.39650355232697376, "grad_norm": 0.4152479684466086, "learning_rate": 1.3744574296492146e-05, "loss": 0.1826, "step": 4967 }, { "epoch": 0.3965833798994173, "grad_norm": 0.27821585520628356, "learning_rate": 1.3742176818657389e-05, "loss": 0.2135, "step": 4968 }, { "epoch": 0.3966632074718608, "grad_norm": 0.29029326976162556, "learning_rate": 1.3739779090672923e-05, "loss": 0.1706, "step": 4969 }, { "epoch": 0.3967430350443043, "grad_norm": 0.2962587692693444, "learning_rate": 1.3737381112699022e-05, "loss": 0.2177, "step": 4970 }, { "epoch": 0.3968228626167478, "grad_norm": 0.2814423319271824, "learning_rate": 1.3734982884895981e-05, "loss": 0.2073, "step": 4971 }, { "epoch": 0.39690269018919133, "grad_norm": 0.3167780485755635, "learning_rate": 1.3732584407424122e-05, "loss": 0.1589, "step": 4972 }, { "epoch": 0.39698251776163485, "grad_norm": 0.31132680898538095, "learning_rate": 1.3730185680443762e-05, "loss": 0.1876, "step": 4973 }, { "epoch": 0.3970623453340784, "grad_norm": 0.28224906474585837, "learning_rate": 1.3727786704115254e-05, "loss": 0.1808, "step": 4974 }, { "epoch": 0.39714217290652193, "grad_norm": 0.26002629168146074, "learning_rate": 1.3725387478598955e-05, "loss": 0.2308, "step": 4975 }, { "epoch": 0.39722200047896544, "grad_norm": 0.29821180756374494, "learning_rate": 1.3722988004055246e-05, "loss": 0.1404, "step": 4976 }, { "epoch": 0.39730182805140896, "grad_norm": 0.2946050690501014, "learning_rate": 1.3720588280644524e-05, "loss": 0.205, "step": 4977 }, { "epoch": 0.39738165562385247, "grad_norm": 0.295844775788747, "learning_rate": 1.3718188308527198e-05, "loss": 0.1907, "step": 4978 }, { "epoch": 0.397461483196296, "grad_norm": 0.29241425572872154, "learning_rate": 1.3715788087863699e-05, "loss": 0.1572, "step": 4979 }, { "epoch": 0.3975413107687395, "grad_norm": 0.36612587913015815, "learning_rate": 1.3713387618814471e-05, "loss": 0.1321, "step": 4980 }, { "epoch": 0.39762113834118307, "grad_norm": 0.38069165221326573, "learning_rate": 1.3710986901539976e-05, "loss": 0.219, "step": 4981 }, { "epoch": 0.3977009659136266, "grad_norm": 0.24702814247113514, "learning_rate": 1.3708585936200692e-05, "loss": 0.1742, "step": 4982 }, { "epoch": 0.3977807934860701, "grad_norm": 0.2636956027319268, "learning_rate": 1.3706184722957117e-05, "loss": 0.1117, "step": 4983 }, { "epoch": 0.3978606210585136, "grad_norm": 0.3220456539018014, "learning_rate": 1.3703783261969758e-05, "loss": 0.2301, "step": 4984 }, { "epoch": 0.3979404486309571, "grad_norm": 0.27824462771666236, "learning_rate": 1.3701381553399147e-05, "loss": 0.1842, "step": 4985 }, { "epoch": 0.39802027620340064, "grad_norm": 0.2988517647920662, "learning_rate": 1.3698979597405824e-05, "loss": 0.1449, "step": 4986 }, { "epoch": 0.39810010377584415, "grad_norm": 0.2849352693755068, "learning_rate": 1.3696577394150357e-05, "loss": 0.1768, "step": 4987 }, { "epoch": 0.3981799313482877, "grad_norm": 0.26221185786973206, "learning_rate": 1.3694174943793316e-05, "loss": 0.1504, "step": 4988 }, { "epoch": 0.39825975892073123, "grad_norm": 0.3071667257091414, "learning_rate": 1.36917722464953e-05, "loss": 0.167, "step": 4989 }, { "epoch": 0.39833958649317475, "grad_norm": 0.33552872640685394, "learning_rate": 1.3689369302416924e-05, "loss": 0.1466, "step": 4990 }, { "epoch": 0.39841941406561826, "grad_norm": 0.2899736449821979, "learning_rate": 1.3686966111718804e-05, "loss": 0.1674, "step": 4991 }, { "epoch": 0.3984992416380618, "grad_norm": 0.3290221417944275, "learning_rate": 1.3684562674561597e-05, "loss": 0.2147, "step": 4992 }, { "epoch": 0.3985790692105053, "grad_norm": 0.31025878826234093, "learning_rate": 1.3682158991105951e-05, "loss": 0.1495, "step": 4993 }, { "epoch": 0.39865889678294886, "grad_norm": 0.3101150186150106, "learning_rate": 1.3679755061512553e-05, "loss": 0.1604, "step": 4994 }, { "epoch": 0.39873872435539237, "grad_norm": 0.3131874437954564, "learning_rate": 1.3677350885942089e-05, "loss": 0.1648, "step": 4995 }, { "epoch": 0.3988185519278359, "grad_norm": 0.35040435265033915, "learning_rate": 1.3674946464555272e-05, "loss": 0.1887, "step": 4996 }, { "epoch": 0.3988983795002794, "grad_norm": 0.30163831577235056, "learning_rate": 1.3672541797512823e-05, "loss": 0.179, "step": 4997 }, { "epoch": 0.3989782070727229, "grad_norm": 0.3026368419402577, "learning_rate": 1.3670136884975493e-05, "loss": 0.2127, "step": 4998 }, { "epoch": 0.3990580346451664, "grad_norm": 0.3197737893855429, "learning_rate": 1.3667731727104037e-05, "loss": 0.1536, "step": 4999 }, { "epoch": 0.39913786221760994, "grad_norm": 0.3691753462395137, "learning_rate": 1.3665326324059228e-05, "loss": 0.1678, "step": 5000 }, { "epoch": 0.3992176897900535, "grad_norm": 0.34037551065414695, "learning_rate": 1.3662920676001861e-05, "loss": 0.1934, "step": 5001 }, { "epoch": 0.399297517362497, "grad_norm": 0.31040917938858287, "learning_rate": 1.366051478309274e-05, "loss": 0.1884, "step": 5002 }, { "epoch": 0.39937734493494054, "grad_norm": 0.31141221170747185, "learning_rate": 1.3658108645492693e-05, "loss": 0.1586, "step": 5003 }, { "epoch": 0.39945717250738405, "grad_norm": 0.2933694953077802, "learning_rate": 1.365570226336256e-05, "loss": 0.1511, "step": 5004 }, { "epoch": 0.39953700007982756, "grad_norm": 0.3485098059129591, "learning_rate": 1.36532956368632e-05, "loss": 0.2235, "step": 5005 }, { "epoch": 0.3996168276522711, "grad_norm": 0.3002640459923241, "learning_rate": 1.3650888766155479e-05, "loss": 0.1683, "step": 5006 }, { "epoch": 0.3996966552247146, "grad_norm": 0.31631439890400276, "learning_rate": 1.3648481651400298e-05, "loss": 0.1439, "step": 5007 }, { "epoch": 0.39977648279715816, "grad_norm": 0.32456432615274644, "learning_rate": 1.3646074292758553e-05, "loss": 0.1724, "step": 5008 }, { "epoch": 0.3998563103696017, "grad_norm": 0.3953986328103173, "learning_rate": 1.3643666690391175e-05, "loss": 0.2268, "step": 5009 }, { "epoch": 0.3999361379420452, "grad_norm": 0.31243672653579635, "learning_rate": 1.3641258844459093e-05, "loss": 0.1769, "step": 5010 }, { "epoch": 0.4000159655144887, "grad_norm": 0.34323190398756387, "learning_rate": 1.363885075512327e-05, "loss": 0.2026, "step": 5011 }, { "epoch": 0.4000957930869322, "grad_norm": 0.28240874857549003, "learning_rate": 1.3636442422544673e-05, "loss": 0.1342, "step": 5012 }, { "epoch": 0.40017562065937573, "grad_norm": 0.29492219886736754, "learning_rate": 1.3634033846884292e-05, "loss": 0.2072, "step": 5013 }, { "epoch": 0.40025544823181924, "grad_norm": 0.27901901640966864, "learning_rate": 1.3631625028303133e-05, "loss": 0.1792, "step": 5014 }, { "epoch": 0.4003352758042628, "grad_norm": 0.3079262294692376, "learning_rate": 1.3629215966962209e-05, "loss": 0.2132, "step": 5015 }, { "epoch": 0.4004151033767063, "grad_norm": 0.32219920885853803, "learning_rate": 1.362680666302256e-05, "loss": 0.1731, "step": 5016 }, { "epoch": 0.40049493094914984, "grad_norm": 0.38827321923604385, "learning_rate": 1.3624397116645239e-05, "loss": 0.2109, "step": 5017 }, { "epoch": 0.40057475852159335, "grad_norm": 0.2737056641755315, "learning_rate": 1.3621987327991314e-05, "loss": 0.1869, "step": 5018 }, { "epoch": 0.40065458609403687, "grad_norm": 0.30120595873320727, "learning_rate": 1.3619577297221869e-05, "loss": 0.1381, "step": 5019 }, { "epoch": 0.4007344136664804, "grad_norm": 0.2930748936579194, "learning_rate": 1.3617167024498006e-05, "loss": 0.2114, "step": 5020 }, { "epoch": 0.40081424123892395, "grad_norm": 0.309291077288565, "learning_rate": 1.3614756509980841e-05, "loss": 0.1748, "step": 5021 }, { "epoch": 0.40089406881136747, "grad_norm": 0.28279424652598817, "learning_rate": 1.3612345753831513e-05, "loss": 0.2104, "step": 5022 }, { "epoch": 0.400973896383811, "grad_norm": 0.30096765092723293, "learning_rate": 1.3609934756211166e-05, "loss": 0.1366, "step": 5023 }, { "epoch": 0.4010537239562545, "grad_norm": 0.2763910069946794, "learning_rate": 1.3607523517280964e-05, "loss": 0.1857, "step": 5024 }, { "epoch": 0.401133551528698, "grad_norm": 0.26430823289229605, "learning_rate": 1.3605112037202091e-05, "loss": 0.1771, "step": 5025 }, { "epoch": 0.4012133791011415, "grad_norm": 0.32765075874953725, "learning_rate": 1.3602700316135745e-05, "loss": 0.1953, "step": 5026 }, { "epoch": 0.40129320667358503, "grad_norm": 0.35220384331717564, "learning_rate": 1.360028835424314e-05, "loss": 0.1988, "step": 5027 }, { "epoch": 0.4013730342460286, "grad_norm": 0.30170974273120216, "learning_rate": 1.359787615168551e-05, "loss": 0.1687, "step": 5028 }, { "epoch": 0.4014528618184721, "grad_norm": 0.2832235131660791, "learning_rate": 1.3595463708624097e-05, "loss": 0.2051, "step": 5029 }, { "epoch": 0.40153268939091563, "grad_norm": 0.3263457282605781, "learning_rate": 1.3593051025220162e-05, "loss": 0.1954, "step": 5030 }, { "epoch": 0.40161251696335915, "grad_norm": 0.2831236663421099, "learning_rate": 1.3590638101634986e-05, "loss": 0.2066, "step": 5031 }, { "epoch": 0.40169234453580266, "grad_norm": 0.29141185742214315, "learning_rate": 1.3588224938029862e-05, "loss": 0.1562, "step": 5032 }, { "epoch": 0.4017721721082462, "grad_norm": 0.2563609612259379, "learning_rate": 1.35858115345661e-05, "loss": 0.1638, "step": 5033 }, { "epoch": 0.4018519996806897, "grad_norm": 0.2922821382785215, "learning_rate": 1.3583397891405029e-05, "loss": 0.1539, "step": 5034 }, { "epoch": 0.40193182725313326, "grad_norm": 0.23831938172299583, "learning_rate": 1.3580984008707987e-05, "loss": 0.1646, "step": 5035 }, { "epoch": 0.40201165482557677, "grad_norm": 0.3711154049129611, "learning_rate": 1.3578569886636341e-05, "loss": 0.2184, "step": 5036 }, { "epoch": 0.4020914823980203, "grad_norm": 0.3150551481473373, "learning_rate": 1.3576155525351458e-05, "loss": 0.1879, "step": 5037 }, { "epoch": 0.4021713099704638, "grad_norm": 0.3205634674482388, "learning_rate": 1.3573740925014728e-05, "loss": 0.1883, "step": 5038 }, { "epoch": 0.4022511375429073, "grad_norm": 0.3277528497441036, "learning_rate": 1.3571326085787562e-05, "loss": 0.155, "step": 5039 }, { "epoch": 0.4023309651153508, "grad_norm": 0.3053237890800326, "learning_rate": 1.356891100783138e-05, "loss": 0.1926, "step": 5040 }, { "epoch": 0.40241079268779434, "grad_norm": 0.2747835011647008, "learning_rate": 1.356649569130762e-05, "loss": 0.1607, "step": 5041 }, { "epoch": 0.4024906202602379, "grad_norm": 0.32814797993438977, "learning_rate": 1.3564080136377737e-05, "loss": 0.1586, "step": 5042 }, { "epoch": 0.4025704478326814, "grad_norm": 0.3057298544137665, "learning_rate": 1.3561664343203202e-05, "loss": 0.1967, "step": 5043 }, { "epoch": 0.40265027540512494, "grad_norm": 0.3749763759971509, "learning_rate": 1.3559248311945502e-05, "loss": 0.1832, "step": 5044 }, { "epoch": 0.40273010297756845, "grad_norm": 0.2572435312298703, "learning_rate": 1.3556832042766139e-05, "loss": 0.2048, "step": 5045 }, { "epoch": 0.40280993055001196, "grad_norm": 0.2973749658012501, "learning_rate": 1.3554415535826627e-05, "loss": 0.1643, "step": 5046 }, { "epoch": 0.4028897581224555, "grad_norm": 0.3644055344264152, "learning_rate": 1.3551998791288502e-05, "loss": 0.2152, "step": 5047 }, { "epoch": 0.402969585694899, "grad_norm": 0.2859244996373782, "learning_rate": 1.3549581809313318e-05, "loss": 0.2163, "step": 5048 }, { "epoch": 0.40304941326734256, "grad_norm": 0.28060878040856857, "learning_rate": 1.3547164590062634e-05, "loss": 0.1706, "step": 5049 }, { "epoch": 0.4031292408397861, "grad_norm": 0.2740900498811022, "learning_rate": 1.3544747133698035e-05, "loss": 0.1047, "step": 5050 }, { "epoch": 0.4032090684122296, "grad_norm": 0.31933571854756165, "learning_rate": 1.3542329440381122e-05, "loss": 0.1582, "step": 5051 }, { "epoch": 0.4032888959846731, "grad_norm": 0.35773204192883024, "learning_rate": 1.35399115102735e-05, "loss": 0.1718, "step": 5052 }, { "epoch": 0.4033687235571166, "grad_norm": 0.32721878625942763, "learning_rate": 1.3537493343536805e-05, "loss": 0.154, "step": 5053 }, { "epoch": 0.40344855112956013, "grad_norm": 0.3031511201221933, "learning_rate": 1.3535074940332677e-05, "loss": 0.1866, "step": 5054 }, { "epoch": 0.4035283787020037, "grad_norm": 0.2904290698367213, "learning_rate": 1.3532656300822782e-05, "loss": 0.1572, "step": 5055 }, { "epoch": 0.4036082062744472, "grad_norm": 0.2960481592176965, "learning_rate": 1.353023742516879e-05, "loss": 0.1782, "step": 5056 }, { "epoch": 0.4036880338468907, "grad_norm": 0.2725364798367584, "learning_rate": 1.3527818313532398e-05, "loss": 0.1569, "step": 5057 }, { "epoch": 0.40376786141933424, "grad_norm": 0.2914323594962801, "learning_rate": 1.3525398966075314e-05, "loss": 0.1673, "step": 5058 }, { "epoch": 0.40384768899177775, "grad_norm": 0.3204547053244419, "learning_rate": 1.352297938295926e-05, "loss": 0.2038, "step": 5059 }, { "epoch": 0.40392751656422127, "grad_norm": 0.37530383170865395, "learning_rate": 1.3520559564345979e-05, "loss": 0.152, "step": 5060 }, { "epoch": 0.4040073441366648, "grad_norm": 0.30880508409436336, "learning_rate": 1.351813951039722e-05, "loss": 0.1847, "step": 5061 }, { "epoch": 0.40408717170910835, "grad_norm": 0.2957120467339259, "learning_rate": 1.3515719221274757e-05, "loss": 0.1571, "step": 5062 }, { "epoch": 0.40416699928155186, "grad_norm": 0.2896389594300641, "learning_rate": 1.3513298697140378e-05, "loss": 0.146, "step": 5063 }, { "epoch": 0.4042468268539954, "grad_norm": 0.3547576392424474, "learning_rate": 1.3510877938155886e-05, "loss": 0.2127, "step": 5064 }, { "epoch": 0.4043266544264389, "grad_norm": 0.27809497672863603, "learning_rate": 1.3508456944483094e-05, "loss": 0.1804, "step": 5065 }, { "epoch": 0.4044064819988824, "grad_norm": 0.2807624446410633, "learning_rate": 1.3506035716283845e-05, "loss": 0.188, "step": 5066 }, { "epoch": 0.4044863095713259, "grad_norm": 0.28398711876256044, "learning_rate": 1.350361425371998e-05, "loss": 0.154, "step": 5067 }, { "epoch": 0.40456613714376943, "grad_norm": 0.2492972556674203, "learning_rate": 1.3501192556953369e-05, "loss": 0.2033, "step": 5068 }, { "epoch": 0.404645964716213, "grad_norm": 0.3023034461346888, "learning_rate": 1.3498770626145892e-05, "loss": 0.1678, "step": 5069 }, { "epoch": 0.4047257922886565, "grad_norm": 0.3014256389717977, "learning_rate": 1.3496348461459441e-05, "loss": 0.1377, "step": 5070 }, { "epoch": 0.40480561986110003, "grad_norm": 0.2917967266141971, "learning_rate": 1.3493926063055935e-05, "loss": 0.2074, "step": 5071 }, { "epoch": 0.40488544743354354, "grad_norm": 0.3183160157933834, "learning_rate": 1.3491503431097298e-05, "loss": 0.1529, "step": 5072 }, { "epoch": 0.40496527500598706, "grad_norm": 0.34445272949950245, "learning_rate": 1.3489080565745474e-05, "loss": 0.1694, "step": 5073 }, { "epoch": 0.40504510257843057, "grad_norm": 0.33782338041627646, "learning_rate": 1.3486657467162423e-05, "loss": 0.1785, "step": 5074 }, { "epoch": 0.4051249301508741, "grad_norm": 0.33428766934250986, "learning_rate": 1.3484234135510117e-05, "loss": 0.1422, "step": 5075 }, { "epoch": 0.40520475772331765, "grad_norm": 0.34209645012678214, "learning_rate": 1.3481810570950548e-05, "loss": 0.176, "step": 5076 }, { "epoch": 0.40528458529576117, "grad_norm": 0.31431119102229527, "learning_rate": 1.3479386773645721e-05, "loss": 0.2158, "step": 5077 }, { "epoch": 0.4053644128682047, "grad_norm": 0.33601451755730777, "learning_rate": 1.3476962743757658e-05, "loss": 0.2132, "step": 5078 }, { "epoch": 0.4054442404406482, "grad_norm": 0.2737931515580515, "learning_rate": 1.3474538481448396e-05, "loss": 0.1618, "step": 5079 }, { "epoch": 0.4055240680130917, "grad_norm": 0.30381193553923186, "learning_rate": 1.3472113986879984e-05, "loss": 0.1501, "step": 5080 }, { "epoch": 0.4056038955855352, "grad_norm": 0.32527689804082976, "learning_rate": 1.3469689260214495e-05, "loss": 0.1809, "step": 5081 }, { "epoch": 0.4056837231579788, "grad_norm": 0.32681110347362574, "learning_rate": 1.3467264301614011e-05, "loss": 0.2283, "step": 5082 }, { "epoch": 0.4057635507304223, "grad_norm": 0.3340032687576286, "learning_rate": 1.3464839111240629e-05, "loss": 0.1478, "step": 5083 }, { "epoch": 0.4058433783028658, "grad_norm": 0.34003011084062473, "learning_rate": 1.3462413689256465e-05, "loss": 0.188, "step": 5084 }, { "epoch": 0.40592320587530933, "grad_norm": 0.3013985328185682, "learning_rate": 1.3459988035823643e-05, "loss": 0.193, "step": 5085 }, { "epoch": 0.40600303344775285, "grad_norm": 0.28340358523846465, "learning_rate": 1.3457562151104319e-05, "loss": 0.219, "step": 5086 }, { "epoch": 0.40608286102019636, "grad_norm": 0.3250067735012022, "learning_rate": 1.3455136035260644e-05, "loss": 0.166, "step": 5087 }, { "epoch": 0.4061626885926399, "grad_norm": 0.27468612172836787, "learning_rate": 1.3452709688454804e-05, "loss": 0.1606, "step": 5088 }, { "epoch": 0.40624251616508344, "grad_norm": 0.2846152401348977, "learning_rate": 1.3450283110848983e-05, "loss": 0.157, "step": 5089 }, { "epoch": 0.40632234373752696, "grad_norm": 0.2823336138669476, "learning_rate": 1.3447856302605387e-05, "loss": 0.2024, "step": 5090 }, { "epoch": 0.4064021713099705, "grad_norm": 0.3549045396997234, "learning_rate": 1.3445429263886245e-05, "loss": 0.1982, "step": 5091 }, { "epoch": 0.406481998882414, "grad_norm": 0.323465690800025, "learning_rate": 1.344300199485379e-05, "loss": 0.1884, "step": 5092 }, { "epoch": 0.4065618264548575, "grad_norm": 0.35385845299344254, "learning_rate": 1.344057449567028e-05, "loss": 0.1362, "step": 5093 }, { "epoch": 0.406641654027301, "grad_norm": 0.3154641926627621, "learning_rate": 1.3438146766497976e-05, "loss": 0.2215, "step": 5094 }, { "epoch": 0.4067214815997445, "grad_norm": 0.2869187821251008, "learning_rate": 1.343571880749917e-05, "loss": 0.159, "step": 5095 }, { "epoch": 0.4068013091721881, "grad_norm": 0.2652387311449919, "learning_rate": 1.3433290618836159e-05, "loss": 0.1526, "step": 5096 }, { "epoch": 0.4068811367446316, "grad_norm": 0.2626257850121777, "learning_rate": 1.3430862200671258e-05, "loss": 0.2237, "step": 5097 }, { "epoch": 0.4069609643170751, "grad_norm": 0.2986285766000145, "learning_rate": 1.3428433553166791e-05, "loss": 0.1655, "step": 5098 }, { "epoch": 0.40704079188951864, "grad_norm": 0.3020106656183207, "learning_rate": 1.3426004676485114e-05, "loss": 0.1642, "step": 5099 }, { "epoch": 0.40712061946196215, "grad_norm": 0.3296264653043328, "learning_rate": 1.342357557078858e-05, "loss": 0.1739, "step": 5100 }, { "epoch": 0.40720044703440567, "grad_norm": 0.3461444670632954, "learning_rate": 1.3421146236239568e-05, "loss": 0.2121, "step": 5101 }, { "epoch": 0.4072802746068492, "grad_norm": 0.296284649878389, "learning_rate": 1.3418716673000466e-05, "loss": 0.2219, "step": 5102 }, { "epoch": 0.40736010217929275, "grad_norm": 0.2727666498543391, "learning_rate": 1.341628688123369e-05, "loss": 0.1956, "step": 5103 }, { "epoch": 0.40743992975173626, "grad_norm": 0.2891690120983624, "learning_rate": 1.3413856861101653e-05, "loss": 0.2024, "step": 5104 }, { "epoch": 0.4075197573241798, "grad_norm": 0.2560728390726239, "learning_rate": 1.3411426612766793e-05, "loss": 0.1648, "step": 5105 }, { "epoch": 0.4075995848966233, "grad_norm": 0.2843961930960445, "learning_rate": 1.3408996136391568e-05, "loss": 0.2235, "step": 5106 }, { "epoch": 0.4076794124690668, "grad_norm": 0.2717551171008426, "learning_rate": 1.3406565432138438e-05, "loss": 0.1792, "step": 5107 }, { "epoch": 0.4077592400415103, "grad_norm": 0.3104321967416396, "learning_rate": 1.3404134500169895e-05, "loss": 0.1578, "step": 5108 }, { "epoch": 0.4078390676139539, "grad_norm": 0.28292886826788904, "learning_rate": 1.3401703340648427e-05, "loss": 0.1493, "step": 5109 }, { "epoch": 0.4079188951863974, "grad_norm": 0.3099392207967549, "learning_rate": 1.3399271953736555e-05, "loss": 0.1667, "step": 5110 }, { "epoch": 0.4079987227588409, "grad_norm": 0.3036393234325706, "learning_rate": 1.3396840339596806e-05, "loss": 0.2007, "step": 5111 }, { "epoch": 0.40807855033128443, "grad_norm": 0.29399663730985326, "learning_rate": 1.3394408498391722e-05, "loss": 0.1357, "step": 5112 }, { "epoch": 0.40815837790372794, "grad_norm": 0.3405270122003677, "learning_rate": 1.3391976430283861e-05, "loss": 0.1841, "step": 5113 }, { "epoch": 0.40823820547617146, "grad_norm": 0.2957214058932895, "learning_rate": 1.3389544135435801e-05, "loss": 0.1351, "step": 5114 }, { "epoch": 0.40831803304861497, "grad_norm": 0.2836243208391738, "learning_rate": 1.338711161401013e-05, "loss": 0.1458, "step": 5115 }, { "epoch": 0.40839786062105854, "grad_norm": 0.30002310457727244, "learning_rate": 1.3384678866169452e-05, "loss": 0.2104, "step": 5116 }, { "epoch": 0.40847768819350205, "grad_norm": 0.32411339818298435, "learning_rate": 1.3382245892076386e-05, "loss": 0.1903, "step": 5117 }, { "epoch": 0.40855751576594557, "grad_norm": 0.29008833957918734, "learning_rate": 1.3379812691893567e-05, "loss": 0.1785, "step": 5118 }, { "epoch": 0.4086373433383891, "grad_norm": 0.33221821666947154, "learning_rate": 1.3377379265783647e-05, "loss": 0.191, "step": 5119 }, { "epoch": 0.4087171709108326, "grad_norm": 0.286266468386783, "learning_rate": 1.3374945613909286e-05, "loss": 0.1998, "step": 5120 }, { "epoch": 0.4087969984832761, "grad_norm": 0.25728483987468814, "learning_rate": 1.337251173643317e-05, "loss": 0.1642, "step": 5121 }, { "epoch": 0.4088768260557196, "grad_norm": 0.24187494027359221, "learning_rate": 1.3370077633517986e-05, "loss": 0.1641, "step": 5122 }, { "epoch": 0.4089566536281632, "grad_norm": 0.27324932281703, "learning_rate": 1.3367643305326453e-05, "loss": 0.1469, "step": 5123 }, { "epoch": 0.4090364812006067, "grad_norm": 0.3518472948632042, "learning_rate": 1.3365208752021288e-05, "loss": 0.1887, "step": 5124 }, { "epoch": 0.4091163087730502, "grad_norm": 0.2893016326833935, "learning_rate": 1.3362773973765239e-05, "loss": 0.2075, "step": 5125 }, { "epoch": 0.40919613634549373, "grad_norm": 0.30668166540792224, "learning_rate": 1.336033897072106e-05, "loss": 0.209, "step": 5126 }, { "epoch": 0.40927596391793725, "grad_norm": 0.28359151374797564, "learning_rate": 1.3357903743051512e-05, "loss": 0.1743, "step": 5127 }, { "epoch": 0.40935579149038076, "grad_norm": 0.28743960011714986, "learning_rate": 1.3355468290919394e-05, "loss": 0.1811, "step": 5128 }, { "epoch": 0.4094356190628243, "grad_norm": 0.3037543825873866, "learning_rate": 1.3353032614487495e-05, "loss": 0.2011, "step": 5129 }, { "epoch": 0.40951544663526784, "grad_norm": 0.32020900116407053, "learning_rate": 1.3350596713918638e-05, "loss": 0.1647, "step": 5130 }, { "epoch": 0.40959527420771136, "grad_norm": 0.3266205020375201, "learning_rate": 1.334816058937565e-05, "loss": 0.2107, "step": 5131 }, { "epoch": 0.40967510178015487, "grad_norm": 0.25676004835960065, "learning_rate": 1.3345724241021376e-05, "loss": 0.2314, "step": 5132 }, { "epoch": 0.4097549293525984, "grad_norm": 0.3063033709521779, "learning_rate": 1.3343287669018678e-05, "loss": 0.1501, "step": 5133 }, { "epoch": 0.4098347569250419, "grad_norm": 0.31659773986748757, "learning_rate": 1.334085087353043e-05, "loss": 0.1472, "step": 5134 }, { "epoch": 0.4099145844974854, "grad_norm": 0.3064548006800501, "learning_rate": 1.333841385471952e-05, "loss": 0.1692, "step": 5135 }, { "epoch": 0.4099944120699289, "grad_norm": 0.2949667534737362, "learning_rate": 1.3335976612748857e-05, "loss": 0.186, "step": 5136 }, { "epoch": 0.4100742396423725, "grad_norm": 0.29048397596719083, "learning_rate": 1.3333539147781361e-05, "loss": 0.1934, "step": 5137 }, { "epoch": 0.410154067214816, "grad_norm": 0.2978010708683964, "learning_rate": 1.333110145997996e-05, "loss": 0.1749, "step": 5138 }, { "epoch": 0.4102338947872595, "grad_norm": 0.3188071919752806, "learning_rate": 1.3328663549507614e-05, "loss": 0.1615, "step": 5139 }, { "epoch": 0.41031372235970304, "grad_norm": 0.32472958064212615, "learning_rate": 1.332622541652728e-05, "loss": 0.1728, "step": 5140 }, { "epoch": 0.41039354993214655, "grad_norm": 0.30892293311469404, "learning_rate": 1.3323787061201944e-05, "loss": 0.1971, "step": 5141 }, { "epoch": 0.41047337750459006, "grad_norm": 0.3421389766250007, "learning_rate": 1.3321348483694594e-05, "loss": 0.1975, "step": 5142 }, { "epoch": 0.41055320507703363, "grad_norm": 0.2995308874993135, "learning_rate": 1.3318909684168242e-05, "loss": 0.1758, "step": 5143 }, { "epoch": 0.41063303264947715, "grad_norm": 0.36770943986426097, "learning_rate": 1.331647066278591e-05, "loss": 0.1314, "step": 5144 }, { "epoch": 0.41071286022192066, "grad_norm": 0.29313214493577483, "learning_rate": 1.3314031419710643e-05, "loss": 0.1652, "step": 5145 }, { "epoch": 0.4107926877943642, "grad_norm": 0.3012170781873028, "learning_rate": 1.3311591955105488e-05, "loss": 0.2317, "step": 5146 }, { "epoch": 0.4108725153668077, "grad_norm": 0.2719818941339445, "learning_rate": 1.3309152269133516e-05, "loss": 0.2094, "step": 5147 }, { "epoch": 0.4109523429392512, "grad_norm": 0.35359003377778186, "learning_rate": 1.3306712361957812e-05, "loss": 0.17, "step": 5148 }, { "epoch": 0.4110321705116947, "grad_norm": 0.32490979429373706, "learning_rate": 1.3304272233741472e-05, "loss": 0.1448, "step": 5149 }, { "epoch": 0.4111119980841383, "grad_norm": 0.26755670210902255, "learning_rate": 1.3301831884647607e-05, "loss": 0.1714, "step": 5150 }, { "epoch": 0.4111918256565818, "grad_norm": 0.3029209707115197, "learning_rate": 1.329939131483935e-05, "loss": 0.2079, "step": 5151 }, { "epoch": 0.4112716532290253, "grad_norm": 0.33066165671584086, "learning_rate": 1.329695052447984e-05, "loss": 0.2369, "step": 5152 }, { "epoch": 0.4113514808014688, "grad_norm": 0.2834766675646414, "learning_rate": 1.3294509513732234e-05, "loss": 0.1233, "step": 5153 }, { "epoch": 0.41143130837391234, "grad_norm": 0.31768256570616754, "learning_rate": 1.3292068282759707e-05, "loss": 0.1464, "step": 5154 }, { "epoch": 0.41151113594635585, "grad_norm": 0.2926130547692016, "learning_rate": 1.3289626831725439e-05, "loss": 0.1911, "step": 5155 }, { "epoch": 0.41159096351879937, "grad_norm": 0.2820036808271428, "learning_rate": 1.3287185160792642e-05, "loss": 0.1806, "step": 5156 }, { "epoch": 0.41167079109124294, "grad_norm": 0.2868391115076352, "learning_rate": 1.3284743270124522e-05, "loss": 0.2148, "step": 5157 }, { "epoch": 0.41175061866368645, "grad_norm": 0.3265877106942611, "learning_rate": 1.3282301159884312e-05, "loss": 0.1892, "step": 5158 }, { "epoch": 0.41183044623612997, "grad_norm": 0.305467035279008, "learning_rate": 1.3279858830235264e-05, "loss": 0.1826, "step": 5159 }, { "epoch": 0.4119102738085735, "grad_norm": 0.2557738724302476, "learning_rate": 1.3277416281340628e-05, "loss": 0.1807, "step": 5160 }, { "epoch": 0.411990101381017, "grad_norm": 0.28118817431689247, "learning_rate": 1.3274973513363686e-05, "loss": 0.2152, "step": 5161 }, { "epoch": 0.4120699289534605, "grad_norm": 0.30825171135673285, "learning_rate": 1.3272530526467724e-05, "loss": 0.1496, "step": 5162 }, { "epoch": 0.412149756525904, "grad_norm": 0.2767107579209771, "learning_rate": 1.327008732081605e-05, "loss": 0.1909, "step": 5163 }, { "epoch": 0.4122295840983476, "grad_norm": 0.32848261269816975, "learning_rate": 1.3267643896571975e-05, "loss": 0.2085, "step": 5164 }, { "epoch": 0.4123094116707911, "grad_norm": 0.2628031291073059, "learning_rate": 1.3265200253898842e-05, "loss": 0.15, "step": 5165 }, { "epoch": 0.4123892392432346, "grad_norm": 0.3025463908044899, "learning_rate": 1.326275639295999e-05, "loss": 0.1997, "step": 5166 }, { "epoch": 0.41246906681567813, "grad_norm": 0.26950359975179006, "learning_rate": 1.3260312313918788e-05, "loss": 0.1971, "step": 5167 }, { "epoch": 0.41254889438812165, "grad_norm": 0.2690926874204431, "learning_rate": 1.3257868016938607e-05, "loss": 0.1913, "step": 5168 }, { "epoch": 0.41262872196056516, "grad_norm": 0.30792054513349126, "learning_rate": 1.3255423502182844e-05, "loss": 0.1495, "step": 5169 }, { "epoch": 0.41270854953300873, "grad_norm": 0.37173702121450536, "learning_rate": 1.3252978769814901e-05, "loss": 0.1832, "step": 5170 }, { "epoch": 0.41278837710545224, "grad_norm": 0.30125460548411204, "learning_rate": 1.3250533819998203e-05, "loss": 0.1431, "step": 5171 }, { "epoch": 0.41286820467789576, "grad_norm": 0.3785061709849288, "learning_rate": 1.3248088652896186e-05, "loss": 0.1714, "step": 5172 }, { "epoch": 0.41294803225033927, "grad_norm": 0.2873240058771802, "learning_rate": 1.324564326867229e-05, "loss": 0.1774, "step": 5173 }, { "epoch": 0.4130278598227828, "grad_norm": 0.2780767646094572, "learning_rate": 1.3243197667489991e-05, "loss": 0.165, "step": 5174 }, { "epoch": 0.4131076873952263, "grad_norm": 0.2938522639064932, "learning_rate": 1.3240751849512758e-05, "loss": 0.1482, "step": 5175 }, { "epoch": 0.4131875149676698, "grad_norm": 0.35144345215383427, "learning_rate": 1.3238305814904092e-05, "loss": 0.1698, "step": 5176 }, { "epoch": 0.4132673425401134, "grad_norm": 0.3530521176083807, "learning_rate": 1.3235859563827496e-05, "loss": 0.1676, "step": 5177 }, { "epoch": 0.4133471701125569, "grad_norm": 0.2994966840720263, "learning_rate": 1.3233413096446495e-05, "loss": 0.1939, "step": 5178 }, { "epoch": 0.4134269976850004, "grad_norm": 0.29091749087642077, "learning_rate": 1.3230966412924624e-05, "loss": 0.2044, "step": 5179 }, { "epoch": 0.4135068252574439, "grad_norm": 0.2880518722750762, "learning_rate": 1.3228519513425435e-05, "loss": 0.1616, "step": 5180 }, { "epoch": 0.41358665282988744, "grad_norm": 0.41191572586789804, "learning_rate": 1.3226072398112492e-05, "loss": 0.1861, "step": 5181 }, { "epoch": 0.41366648040233095, "grad_norm": 0.3456461099631131, "learning_rate": 1.3223625067149377e-05, "loss": 0.1357, "step": 5182 }, { "epoch": 0.41374630797477446, "grad_norm": 0.31608670802878613, "learning_rate": 1.3221177520699684e-05, "loss": 0.1851, "step": 5183 }, { "epoch": 0.41382613554721803, "grad_norm": 0.28966922965335656, "learning_rate": 1.3218729758927018e-05, "loss": 0.1498, "step": 5184 }, { "epoch": 0.41390596311966155, "grad_norm": 0.31092161702700777, "learning_rate": 1.3216281781995014e-05, "loss": 0.1516, "step": 5185 }, { "epoch": 0.41398579069210506, "grad_norm": 0.2861571439100369, "learning_rate": 1.3213833590067294e-05, "loss": 0.1603, "step": 5186 }, { "epoch": 0.4140656182645486, "grad_norm": 0.3426956872808428, "learning_rate": 1.3211385183307517e-05, "loss": 0.1938, "step": 5187 }, { "epoch": 0.4141454458369921, "grad_norm": 0.30521366361113356, "learning_rate": 1.3208936561879351e-05, "loss": 0.1778, "step": 5188 }, { "epoch": 0.4142252734094356, "grad_norm": 0.30205024300420624, "learning_rate": 1.3206487725946475e-05, "loss": 0.2243, "step": 5189 }, { "epoch": 0.4143051009818791, "grad_norm": 0.2805696018595688, "learning_rate": 1.3204038675672581e-05, "loss": 0.1863, "step": 5190 }, { "epoch": 0.4143849285543227, "grad_norm": 0.3424906532628756, "learning_rate": 1.3201589411221385e-05, "loss": 0.1605, "step": 5191 }, { "epoch": 0.4144647561267662, "grad_norm": 0.39078568626090004, "learning_rate": 1.3199139932756605e-05, "loss": 0.1932, "step": 5192 }, { "epoch": 0.4145445836992097, "grad_norm": 0.3434119867629946, "learning_rate": 1.3196690240441982e-05, "loss": 0.1646, "step": 5193 }, { "epoch": 0.4146244112716532, "grad_norm": 0.3105059589062598, "learning_rate": 1.3194240334441268e-05, "loss": 0.1939, "step": 5194 }, { "epoch": 0.41470423884409674, "grad_norm": 0.33372602307212856, "learning_rate": 1.3191790214918226e-05, "loss": 0.1619, "step": 5195 }, { "epoch": 0.41478406641654025, "grad_norm": 0.32090496314290246, "learning_rate": 1.3189339882036642e-05, "loss": 0.1394, "step": 5196 }, { "epoch": 0.41486389398898377, "grad_norm": 0.28339312420218754, "learning_rate": 1.3186889335960305e-05, "loss": 0.1864, "step": 5197 }, { "epoch": 0.41494372156142734, "grad_norm": 0.29506769798125143, "learning_rate": 1.3184438576853031e-05, "loss": 0.1512, "step": 5198 }, { "epoch": 0.41502354913387085, "grad_norm": 0.31332876692528494, "learning_rate": 1.3181987604878636e-05, "loss": 0.1505, "step": 5199 }, { "epoch": 0.41510337670631436, "grad_norm": 0.38870691306389027, "learning_rate": 1.317953642020097e-05, "loss": 0.2113, "step": 5200 }, { "epoch": 0.4151832042787579, "grad_norm": 0.28255670101738795, "learning_rate": 1.3177085022983872e-05, "loss": 0.169, "step": 5201 }, { "epoch": 0.4152630318512014, "grad_norm": 0.3400470950152951, "learning_rate": 1.317463341339121e-05, "loss": 0.1575, "step": 5202 }, { "epoch": 0.4153428594236449, "grad_norm": 0.33707346308497965, "learning_rate": 1.3172181591586872e-05, "loss": 0.1611, "step": 5203 }, { "epoch": 0.4154226869960885, "grad_norm": 0.31477057361653416, "learning_rate": 1.3169729557734748e-05, "loss": 0.1754, "step": 5204 }, { "epoch": 0.415502514568532, "grad_norm": 0.34916287587339667, "learning_rate": 1.3167277311998747e-05, "loss": 0.1739, "step": 5205 }, { "epoch": 0.4155823421409755, "grad_norm": 0.3317871566502543, "learning_rate": 1.3164824854542791e-05, "loss": 0.142, "step": 5206 }, { "epoch": 0.415662169713419, "grad_norm": 0.3883169112804482, "learning_rate": 1.3162372185530818e-05, "loss": 0.1593, "step": 5207 }, { "epoch": 0.41574199728586253, "grad_norm": 0.34036733874435915, "learning_rate": 1.315991930512678e-05, "loss": 0.2388, "step": 5208 }, { "epoch": 0.41582182485830604, "grad_norm": 0.3075754690377712, "learning_rate": 1.3157466213494643e-05, "loss": 0.2056, "step": 5209 }, { "epoch": 0.41590165243074956, "grad_norm": 0.3487794083892346, "learning_rate": 1.3155012910798382e-05, "loss": 0.2039, "step": 5210 }, { "epoch": 0.4159814800031931, "grad_norm": 0.3077807690139585, "learning_rate": 1.3152559397201995e-05, "loss": 0.1945, "step": 5211 }, { "epoch": 0.41606130757563664, "grad_norm": 0.3106626299209153, "learning_rate": 1.3150105672869489e-05, "loss": 0.1437, "step": 5212 }, { "epoch": 0.41614113514808015, "grad_norm": 0.3661276445179104, "learning_rate": 1.3147651737964884e-05, "loss": 0.1761, "step": 5213 }, { "epoch": 0.41622096272052367, "grad_norm": 0.30733627871223157, "learning_rate": 1.3145197592652217e-05, "loss": 0.1883, "step": 5214 }, { "epoch": 0.4163007902929672, "grad_norm": 0.3138442317173802, "learning_rate": 1.3142743237095538e-05, "loss": 0.1809, "step": 5215 }, { "epoch": 0.4163806178654107, "grad_norm": 0.28346305176660014, "learning_rate": 1.3140288671458912e-05, "loss": 0.1424, "step": 5216 }, { "epoch": 0.4164604454378542, "grad_norm": 0.3565940928171859, "learning_rate": 1.3137833895906413e-05, "loss": 0.2097, "step": 5217 }, { "epoch": 0.4165402730102978, "grad_norm": 0.25546767854249114, "learning_rate": 1.313537891060214e-05, "loss": 0.171, "step": 5218 }, { "epoch": 0.4166201005827413, "grad_norm": 0.36333292475315304, "learning_rate": 1.3132923715710191e-05, "loss": 0.1559, "step": 5219 }, { "epoch": 0.4166999281551848, "grad_norm": 0.3167389211345519, "learning_rate": 1.313046831139469e-05, "loss": 0.192, "step": 5220 }, { "epoch": 0.4167797557276283, "grad_norm": 0.319813139419999, "learning_rate": 1.3128012697819772e-05, "loss": 0.1806, "step": 5221 }, { "epoch": 0.41685958330007183, "grad_norm": 0.27496223831733474, "learning_rate": 1.3125556875149586e-05, "loss": 0.1976, "step": 5222 }, { "epoch": 0.41693941087251535, "grad_norm": 0.2771023286096936, "learning_rate": 1.312310084354829e-05, "loss": 0.1768, "step": 5223 }, { "epoch": 0.41701923844495886, "grad_norm": 0.2971514612313518, "learning_rate": 1.312064460318006e-05, "loss": 0.2063, "step": 5224 }, { "epoch": 0.41709906601740243, "grad_norm": 0.32843442543505424, "learning_rate": 1.3118188154209094e-05, "loss": 0.1665, "step": 5225 }, { "epoch": 0.41717889358984594, "grad_norm": 0.29101945968293924, "learning_rate": 1.3115731496799583e-05, "loss": 0.1422, "step": 5226 }, { "epoch": 0.41725872116228946, "grad_norm": 0.3644585493826029, "learning_rate": 1.3113274631115756e-05, "loss": 0.177, "step": 5227 }, { "epoch": 0.417338548734733, "grad_norm": 0.33388235389688303, "learning_rate": 1.3110817557321838e-05, "loss": 0.1909, "step": 5228 }, { "epoch": 0.4174183763071765, "grad_norm": 0.3479761859350015, "learning_rate": 1.310836027558208e-05, "loss": 0.192, "step": 5229 }, { "epoch": 0.41749820387962, "grad_norm": 0.3319621268885126, "learning_rate": 1.3105902786060736e-05, "loss": 0.1504, "step": 5230 }, { "epoch": 0.41757803145206357, "grad_norm": 0.30320758441623247, "learning_rate": 1.3103445088922085e-05, "loss": 0.2068, "step": 5231 }, { "epoch": 0.4176578590245071, "grad_norm": 0.31783126556787855, "learning_rate": 1.3100987184330407e-05, "loss": 0.18, "step": 5232 }, { "epoch": 0.4177376865969506, "grad_norm": 0.2729032125174307, "learning_rate": 1.3098529072450012e-05, "loss": 0.1372, "step": 5233 }, { "epoch": 0.4178175141693941, "grad_norm": 0.2789401091342425, "learning_rate": 1.3096070753445206e-05, "loss": 0.1721, "step": 5234 }, { "epoch": 0.4178973417418376, "grad_norm": 0.3590088201733822, "learning_rate": 1.3093612227480328e-05, "loss": 0.1712, "step": 5235 }, { "epoch": 0.41797716931428114, "grad_norm": 0.2755983202551283, "learning_rate": 1.3091153494719712e-05, "loss": 0.1562, "step": 5236 }, { "epoch": 0.41805699688672465, "grad_norm": 0.279821343554762, "learning_rate": 1.3088694555327718e-05, "loss": 0.1519, "step": 5237 }, { "epoch": 0.4181368244591682, "grad_norm": 0.3016320633788663, "learning_rate": 1.308623540946872e-05, "loss": 0.2009, "step": 5238 }, { "epoch": 0.41821665203161174, "grad_norm": 0.30941059272553484, "learning_rate": 1.3083776057307094e-05, "loss": 0.2167, "step": 5239 }, { "epoch": 0.41829647960405525, "grad_norm": 0.3265696672187723, "learning_rate": 1.3081316499007243e-05, "loss": 0.141, "step": 5240 }, { "epoch": 0.41837630717649876, "grad_norm": 0.2827785579398617, "learning_rate": 1.3078856734733579e-05, "loss": 0.1492, "step": 5241 }, { "epoch": 0.4184561347489423, "grad_norm": 0.2835817617375537, "learning_rate": 1.3076396764650526e-05, "loss": 0.1439, "step": 5242 }, { "epoch": 0.4185359623213858, "grad_norm": 0.29138527544512055, "learning_rate": 1.3073936588922522e-05, "loss": 0.2165, "step": 5243 }, { "epoch": 0.4186157898938293, "grad_norm": 0.3336309585453976, "learning_rate": 1.3071476207714027e-05, "loss": 0.2148, "step": 5244 }, { "epoch": 0.4186956174662729, "grad_norm": 0.33793219211847664, "learning_rate": 1.30690156211895e-05, "loss": 0.1977, "step": 5245 }, { "epoch": 0.4187754450387164, "grad_norm": 0.2698654493367616, "learning_rate": 1.3066554829513425e-05, "loss": 0.2163, "step": 5246 }, { "epoch": 0.4188552726111599, "grad_norm": 0.27077433143197843, "learning_rate": 1.3064093832850294e-05, "loss": 0.1653, "step": 5247 }, { "epoch": 0.4189351001836034, "grad_norm": 0.30167343537136276, "learning_rate": 1.3061632631364613e-05, "loss": 0.1363, "step": 5248 }, { "epoch": 0.41901492775604693, "grad_norm": 0.31074827427473495, "learning_rate": 1.3059171225220912e-05, "loss": 0.2681, "step": 5249 }, { "epoch": 0.41909475532849044, "grad_norm": 0.3010817240063485, "learning_rate": 1.3056709614583718e-05, "loss": 0.207, "step": 5250 }, { "epoch": 0.41917458290093396, "grad_norm": 0.3851445737552893, "learning_rate": 1.3054247799617586e-05, "loss": 0.1663, "step": 5251 }, { "epoch": 0.4192544104733775, "grad_norm": 0.2755907386911619, "learning_rate": 1.3051785780487074e-05, "loss": 0.1465, "step": 5252 }, { "epoch": 0.41933423804582104, "grad_norm": 0.28650346857118025, "learning_rate": 1.3049323557356761e-05, "loss": 0.1784, "step": 5253 }, { "epoch": 0.41941406561826455, "grad_norm": 0.28524039062532675, "learning_rate": 1.3046861130391232e-05, "loss": 0.2257, "step": 5254 }, { "epoch": 0.41949389319070807, "grad_norm": 0.33625246753359417, "learning_rate": 1.3044398499755098e-05, "loss": 0.2342, "step": 5255 }, { "epoch": 0.4195737207631516, "grad_norm": 0.31227051513013987, "learning_rate": 1.304193566561297e-05, "loss": 0.1967, "step": 5256 }, { "epoch": 0.4196535483355951, "grad_norm": 0.29066498541520513, "learning_rate": 1.3039472628129484e-05, "loss": 0.2063, "step": 5257 }, { "epoch": 0.41973337590803866, "grad_norm": 0.3301725048368017, "learning_rate": 1.3037009387469274e-05, "loss": 0.1843, "step": 5258 }, { "epoch": 0.4198132034804822, "grad_norm": 0.30969891149682666, "learning_rate": 1.3034545943797012e-05, "loss": 0.1746, "step": 5259 }, { "epoch": 0.4198930310529257, "grad_norm": 0.33208587849275784, "learning_rate": 1.3032082297277362e-05, "loss": 0.2076, "step": 5260 }, { "epoch": 0.4199728586253692, "grad_norm": 0.2918233049855495, "learning_rate": 1.3029618448075007e-05, "loss": 0.1562, "step": 5261 }, { "epoch": 0.4200526861978127, "grad_norm": 0.2786372901734323, "learning_rate": 1.3027154396354652e-05, "loss": 0.1856, "step": 5262 }, { "epoch": 0.42013251377025623, "grad_norm": 0.24717979058923797, "learning_rate": 1.3024690142281e-05, "loss": 0.1664, "step": 5263 }, { "epoch": 0.42021234134269975, "grad_norm": 0.3622996862723429, "learning_rate": 1.3022225686018786e-05, "loss": 0.1787, "step": 5264 }, { "epoch": 0.4202921689151433, "grad_norm": 0.358328748615289, "learning_rate": 1.3019761027732743e-05, "loss": 0.1765, "step": 5265 }, { "epoch": 0.42037199648758683, "grad_norm": 0.2999256230313618, "learning_rate": 1.3017296167587627e-05, "loss": 0.2407, "step": 5266 }, { "epoch": 0.42045182406003034, "grad_norm": 0.32115241038976095, "learning_rate": 1.3014831105748205e-05, "loss": 0.1597, "step": 5267 }, { "epoch": 0.42053165163247386, "grad_norm": 0.32202535321815534, "learning_rate": 1.3012365842379254e-05, "loss": 0.2234, "step": 5268 }, { "epoch": 0.42061147920491737, "grad_norm": 0.25503476319509066, "learning_rate": 1.3009900377645564e-05, "loss": 0.1933, "step": 5269 }, { "epoch": 0.4206913067773609, "grad_norm": 0.2868557792882676, "learning_rate": 1.3007434711711944e-05, "loss": 0.212, "step": 5270 }, { "epoch": 0.4207711343498044, "grad_norm": 0.3136822125622704, "learning_rate": 1.300496884474322e-05, "loss": 0.1504, "step": 5271 }, { "epoch": 0.42085096192224797, "grad_norm": 0.30634220923933775, "learning_rate": 1.3002502776904218e-05, "loss": 0.1908, "step": 5272 }, { "epoch": 0.4209307894946915, "grad_norm": 0.2877210418034296, "learning_rate": 1.3000036508359787e-05, "loss": 0.2089, "step": 5273 }, { "epoch": 0.421010617067135, "grad_norm": 0.306603032554967, "learning_rate": 1.2997570039274789e-05, "loss": 0.162, "step": 5274 }, { "epoch": 0.4210904446395785, "grad_norm": 0.33747281533919665, "learning_rate": 1.2995103369814102e-05, "loss": 0.1725, "step": 5275 }, { "epoch": 0.421170272212022, "grad_norm": 0.2678505381932459, "learning_rate": 1.2992636500142601e-05, "loss": 0.1534, "step": 5276 }, { "epoch": 0.42125009978446554, "grad_norm": 0.2893499890930782, "learning_rate": 1.2990169430425194e-05, "loss": 0.1739, "step": 5277 }, { "epoch": 0.42132992735690905, "grad_norm": 0.28981749747182134, "learning_rate": 1.2987702160826794e-05, "loss": 0.1405, "step": 5278 }, { "epoch": 0.4214097549293526, "grad_norm": 0.3317759512977176, "learning_rate": 1.298523469151233e-05, "loss": 0.1654, "step": 5279 }, { "epoch": 0.42148958250179613, "grad_norm": 0.3285228566058781, "learning_rate": 1.2982767022646736e-05, "loss": 0.1881, "step": 5280 }, { "epoch": 0.42156941007423965, "grad_norm": 0.2616263577123979, "learning_rate": 1.2980299154394978e-05, "loss": 0.1412, "step": 5281 }, { "epoch": 0.42164923764668316, "grad_norm": 0.30502436585084586, "learning_rate": 1.297783108692201e-05, "loss": 0.1473, "step": 5282 }, { "epoch": 0.4217290652191267, "grad_norm": 0.3311370837766346, "learning_rate": 1.2975362820392821e-05, "loss": 0.173, "step": 5283 }, { "epoch": 0.4218088927915702, "grad_norm": 0.3311831840332195, "learning_rate": 1.29728943549724e-05, "loss": 0.176, "step": 5284 }, { "epoch": 0.4218887203640137, "grad_norm": 0.32953797775394295, "learning_rate": 1.2970425690825756e-05, "loss": 0.2111, "step": 5285 }, { "epoch": 0.42196854793645727, "grad_norm": 0.3002611777925807, "learning_rate": 1.2967956828117914e-05, "loss": 0.1245, "step": 5286 }, { "epoch": 0.4220483755089008, "grad_norm": 0.2788545519053647, "learning_rate": 1.29654877670139e-05, "loss": 0.1589, "step": 5287 }, { "epoch": 0.4221282030813443, "grad_norm": 0.30670023357298554, "learning_rate": 1.2963018507678767e-05, "loss": 0.1729, "step": 5288 }, { "epoch": 0.4222080306537878, "grad_norm": 0.3715534154821633, "learning_rate": 1.2960549050277568e-05, "loss": 0.1623, "step": 5289 }, { "epoch": 0.4222878582262313, "grad_norm": 0.3304444973109751, "learning_rate": 1.295807939497539e-05, "loss": 0.1465, "step": 5290 }, { "epoch": 0.42236768579867484, "grad_norm": 0.3772126870046625, "learning_rate": 1.2955609541937302e-05, "loss": 0.1758, "step": 5291 }, { "epoch": 0.4224475133711184, "grad_norm": 0.3311828126981959, "learning_rate": 1.2953139491328417e-05, "loss": 0.1641, "step": 5292 }, { "epoch": 0.4225273409435619, "grad_norm": 0.2849531734876983, "learning_rate": 1.2950669243313843e-05, "loss": 0.1523, "step": 5293 }, { "epoch": 0.42260716851600544, "grad_norm": 0.3077938615830657, "learning_rate": 1.2948198798058704e-05, "loss": 0.1707, "step": 5294 }, { "epoch": 0.42268699608844895, "grad_norm": 0.3745247483192824, "learning_rate": 1.2945728155728146e-05, "loss": 0.2206, "step": 5295 }, { "epoch": 0.42276682366089247, "grad_norm": 0.335453499544586, "learning_rate": 1.2943257316487316e-05, "loss": 0.1942, "step": 5296 }, { "epoch": 0.422846651233336, "grad_norm": 0.2702546250279622, "learning_rate": 1.2940786280501388e-05, "loss": 0.1643, "step": 5297 }, { "epoch": 0.4229264788057795, "grad_norm": 0.29844221489356376, "learning_rate": 1.2938315047935528e-05, "loss": 0.1329, "step": 5298 }, { "epoch": 0.42300630637822306, "grad_norm": 0.26954940218865764, "learning_rate": 1.293584361895494e-05, "loss": 0.1832, "step": 5299 }, { "epoch": 0.4230861339506666, "grad_norm": 0.2636834652496719, "learning_rate": 1.2933371993724821e-05, "loss": 0.1474, "step": 5300 }, { "epoch": 0.4231659615231101, "grad_norm": 0.3448076158072087, "learning_rate": 1.2930900172410397e-05, "loss": 0.1744, "step": 5301 }, { "epoch": 0.4232457890955536, "grad_norm": 0.28226123673820763, "learning_rate": 1.2928428155176888e-05, "loss": 0.1647, "step": 5302 }, { "epoch": 0.4233256166679971, "grad_norm": 0.26941645472037445, "learning_rate": 1.2925955942189555e-05, "loss": 0.1626, "step": 5303 }, { "epoch": 0.42340544424044063, "grad_norm": 0.3215167094096506, "learning_rate": 1.292348353361364e-05, "loss": 0.1644, "step": 5304 }, { "epoch": 0.42348527181288415, "grad_norm": 0.33868717995577297, "learning_rate": 1.2921010929614422e-05, "loss": 0.1528, "step": 5305 }, { "epoch": 0.4235650993853277, "grad_norm": 0.2930727078998171, "learning_rate": 1.2918538130357184e-05, "loss": 0.1384, "step": 5306 }, { "epoch": 0.42364492695777123, "grad_norm": 0.32517997919115754, "learning_rate": 1.2916065136007219e-05, "loss": 0.1912, "step": 5307 }, { "epoch": 0.42372475453021474, "grad_norm": 0.3764097371837692, "learning_rate": 1.2913591946729845e-05, "loss": 0.1914, "step": 5308 }, { "epoch": 0.42380458210265826, "grad_norm": 0.2912402977231093, "learning_rate": 1.2911118562690375e-05, "loss": 0.1586, "step": 5309 }, { "epoch": 0.42388440967510177, "grad_norm": 0.25363252108852735, "learning_rate": 1.2908644984054151e-05, "loss": 0.1411, "step": 5310 }, { "epoch": 0.4239642372475453, "grad_norm": 0.2856626995880199, "learning_rate": 1.290617121098652e-05, "loss": 0.1576, "step": 5311 }, { "epoch": 0.4240440648199888, "grad_norm": 0.40670466841228203, "learning_rate": 1.2903697243652849e-05, "loss": 0.1833, "step": 5312 }, { "epoch": 0.42412389239243237, "grad_norm": 0.3377321133782575, "learning_rate": 1.2901223082218502e-05, "loss": 0.2684, "step": 5313 }, { "epoch": 0.4242037199648759, "grad_norm": 0.3202199685938702, "learning_rate": 1.2898748726848878e-05, "loss": 0.1607, "step": 5314 }, { "epoch": 0.4242835475373194, "grad_norm": 0.34884793107510903, "learning_rate": 1.2896274177709368e-05, "loss": 0.1655, "step": 5315 }, { "epoch": 0.4243633751097629, "grad_norm": 0.3754600307095586, "learning_rate": 1.2893799434965393e-05, "loss": 0.1791, "step": 5316 }, { "epoch": 0.4244432026822064, "grad_norm": 0.3100941951256149, "learning_rate": 1.289132449878238e-05, "loss": 0.1984, "step": 5317 }, { "epoch": 0.42452303025464994, "grad_norm": 0.31964917296440615, "learning_rate": 1.2888849369325764e-05, "loss": 0.1661, "step": 5318 }, { "epoch": 0.4246028578270935, "grad_norm": 0.2843665733073731, "learning_rate": 1.2886374046761002e-05, "loss": 0.1891, "step": 5319 }, { "epoch": 0.424682685399537, "grad_norm": 0.38590578761978284, "learning_rate": 1.2883898531253559e-05, "loss": 0.191, "step": 5320 }, { "epoch": 0.42476251297198053, "grad_norm": 0.36576972630287075, "learning_rate": 1.2881422822968909e-05, "loss": 0.159, "step": 5321 }, { "epoch": 0.42484234054442405, "grad_norm": 0.30764328388079765, "learning_rate": 1.2878946922072549e-05, "loss": 0.1373, "step": 5322 }, { "epoch": 0.42492216811686756, "grad_norm": 0.29542366741714554, "learning_rate": 1.2876470828729977e-05, "loss": 0.1876, "step": 5323 }, { "epoch": 0.4250019956893111, "grad_norm": 0.25626398833708675, "learning_rate": 1.2873994543106715e-05, "loss": 0.1711, "step": 5324 }, { "epoch": 0.4250818232617546, "grad_norm": 0.328296179542077, "learning_rate": 1.2871518065368292e-05, "loss": 0.1495, "step": 5325 }, { "epoch": 0.42516165083419816, "grad_norm": 0.32412087650390037, "learning_rate": 1.2869041395680249e-05, "loss": 0.2168, "step": 5326 }, { "epoch": 0.42524147840664167, "grad_norm": 0.2662241310055567, "learning_rate": 1.2866564534208143e-05, "loss": 0.2249, "step": 5327 }, { "epoch": 0.4253213059790852, "grad_norm": 0.304204504899652, "learning_rate": 1.2864087481117544e-05, "loss": 0.196, "step": 5328 }, { "epoch": 0.4254011335515287, "grad_norm": 0.3116728740383383, "learning_rate": 1.2861610236574028e-05, "loss": 0.1986, "step": 5329 }, { "epoch": 0.4254809611239722, "grad_norm": 0.2966631201418549, "learning_rate": 1.2859132800743198e-05, "loss": 0.1825, "step": 5330 }, { "epoch": 0.4255607886964157, "grad_norm": 0.32431953760585436, "learning_rate": 1.285665517379065e-05, "loss": 0.163, "step": 5331 }, { "epoch": 0.42564061626885924, "grad_norm": 0.2702382479268457, "learning_rate": 1.285417735588201e-05, "loss": 0.1712, "step": 5332 }, { "epoch": 0.4257204438413028, "grad_norm": 0.3088968755541565, "learning_rate": 1.2851699347182906e-05, "loss": 0.1934, "step": 5333 }, { "epoch": 0.4258002714137463, "grad_norm": 0.27649821313588246, "learning_rate": 1.2849221147858993e-05, "loss": 0.1643, "step": 5334 }, { "epoch": 0.42588009898618984, "grad_norm": 0.2961177776037913, "learning_rate": 1.2846742758075914e-05, "loss": 0.1474, "step": 5335 }, { "epoch": 0.42595992655863335, "grad_norm": 0.3071116840347325, "learning_rate": 1.2844264177999352e-05, "loss": 0.1658, "step": 5336 }, { "epoch": 0.42603975413107686, "grad_norm": 0.2897091792065124, "learning_rate": 1.2841785407794985e-05, "loss": 0.2141, "step": 5337 }, { "epoch": 0.4261195817035204, "grad_norm": 0.28667233257728786, "learning_rate": 1.283930644762851e-05, "loss": 0.1814, "step": 5338 }, { "epoch": 0.4261994092759639, "grad_norm": 0.28106910724681783, "learning_rate": 1.2836827297665632e-05, "loss": 0.1499, "step": 5339 }, { "epoch": 0.42627923684840746, "grad_norm": 0.3137274257123562, "learning_rate": 1.2834347958072077e-05, "loss": 0.1672, "step": 5340 }, { "epoch": 0.426359064420851, "grad_norm": 0.2963281986481417, "learning_rate": 1.2831868429013581e-05, "loss": 0.1948, "step": 5341 }, { "epoch": 0.4264388919932945, "grad_norm": 0.26159289695997195, "learning_rate": 1.2829388710655886e-05, "loss": 0.1621, "step": 5342 }, { "epoch": 0.426518719565738, "grad_norm": 0.2447698059471036, "learning_rate": 1.2826908803164753e-05, "loss": 0.236, "step": 5343 }, { "epoch": 0.4265985471381815, "grad_norm": 0.29111910741261177, "learning_rate": 1.2824428706705951e-05, "loss": 0.1761, "step": 5344 }, { "epoch": 0.42667837471062503, "grad_norm": 0.2813103219889586, "learning_rate": 1.2821948421445271e-05, "loss": 0.1731, "step": 5345 }, { "epoch": 0.4267582022830686, "grad_norm": 0.32015332988650264, "learning_rate": 1.2819467947548505e-05, "loss": 0.1987, "step": 5346 }, { "epoch": 0.4268380298555121, "grad_norm": 0.2706291532739905, "learning_rate": 1.2816987285181464e-05, "loss": 0.2045, "step": 5347 }, { "epoch": 0.4269178574279556, "grad_norm": 0.3277420810571824, "learning_rate": 1.281450643450997e-05, "loss": 0.1819, "step": 5348 }, { "epoch": 0.42699768500039914, "grad_norm": 0.3000778609864322, "learning_rate": 1.2812025395699863e-05, "loss": 0.1773, "step": 5349 }, { "epoch": 0.42707751257284265, "grad_norm": 0.2976062116311155, "learning_rate": 1.2809544168916986e-05, "loss": 0.1785, "step": 5350 }, { "epoch": 0.42715734014528617, "grad_norm": 0.23765131517447824, "learning_rate": 1.2807062754327196e-05, "loss": 0.1716, "step": 5351 }, { "epoch": 0.4272371677177297, "grad_norm": 0.3665725461692161, "learning_rate": 1.280458115209637e-05, "loss": 0.2382, "step": 5352 }, { "epoch": 0.42731699529017325, "grad_norm": 0.27459578123269557, "learning_rate": 1.2802099362390395e-05, "loss": 0.1848, "step": 5353 }, { "epoch": 0.42739682286261677, "grad_norm": 0.28813420693562464, "learning_rate": 1.2799617385375166e-05, "loss": 0.1646, "step": 5354 }, { "epoch": 0.4274766504350603, "grad_norm": 0.29749622964240224, "learning_rate": 1.2797135221216595e-05, "loss": 0.1904, "step": 5355 }, { "epoch": 0.4275564780075038, "grad_norm": 0.29911905868729294, "learning_rate": 1.2794652870080604e-05, "loss": 0.1568, "step": 5356 }, { "epoch": 0.4276363055799473, "grad_norm": 0.2889363944393387, "learning_rate": 1.2792170332133129e-05, "loss": 0.196, "step": 5357 }, { "epoch": 0.4277161331523908, "grad_norm": 0.28361715739887644, "learning_rate": 1.2789687607540116e-05, "loss": 0.1749, "step": 5358 }, { "epoch": 0.42779596072483433, "grad_norm": 0.2909733156429373, "learning_rate": 1.2787204696467526e-05, "loss": 0.1836, "step": 5359 }, { "epoch": 0.4278757882972779, "grad_norm": 0.2975053657354819, "learning_rate": 1.2784721599081334e-05, "loss": 0.1558, "step": 5360 }, { "epoch": 0.4279556158697214, "grad_norm": 0.3160375566031024, "learning_rate": 1.278223831554752e-05, "loss": 0.1695, "step": 5361 }, { "epoch": 0.42803544344216493, "grad_norm": 0.3253744904209685, "learning_rate": 1.2779754846032085e-05, "loss": 0.1458, "step": 5362 }, { "epoch": 0.42811527101460845, "grad_norm": 0.30927610982414516, "learning_rate": 1.2777271190701041e-05, "loss": 0.1466, "step": 5363 }, { "epoch": 0.42819509858705196, "grad_norm": 0.3842614698614869, "learning_rate": 1.277478734972041e-05, "loss": 0.1987, "step": 5364 }, { "epoch": 0.4282749261594955, "grad_norm": 0.313435662478997, "learning_rate": 1.2772303323256226e-05, "loss": 0.1969, "step": 5365 }, { "epoch": 0.428354753731939, "grad_norm": 0.3108108856716733, "learning_rate": 1.2769819111474533e-05, "loss": 0.147, "step": 5366 }, { "epoch": 0.42843458130438256, "grad_norm": 0.3324877585035123, "learning_rate": 1.2767334714541396e-05, "loss": 0.1628, "step": 5367 }, { "epoch": 0.42851440887682607, "grad_norm": 0.28717789217421524, "learning_rate": 1.2764850132622881e-05, "loss": 0.1472, "step": 5368 }, { "epoch": 0.4285942364492696, "grad_norm": 0.27441794624645444, "learning_rate": 1.2762365365885081e-05, "loss": 0.2093, "step": 5369 }, { "epoch": 0.4286740640217131, "grad_norm": 0.2999443527940127, "learning_rate": 1.2759880414494085e-05, "loss": 0.1333, "step": 5370 }, { "epoch": 0.4287538915941566, "grad_norm": 0.33236933060578805, "learning_rate": 1.275739527861601e-05, "loss": 0.2501, "step": 5371 }, { "epoch": 0.4288337191666001, "grad_norm": 0.3097395491572676, "learning_rate": 1.2754909958416968e-05, "loss": 0.1481, "step": 5372 }, { "epoch": 0.42891354673904364, "grad_norm": 0.2841052043713531, "learning_rate": 1.27524244540631e-05, "loss": 0.1539, "step": 5373 }, { "epoch": 0.4289933743114872, "grad_norm": 0.3306451377088392, "learning_rate": 1.274993876572055e-05, "loss": 0.1321, "step": 5374 }, { "epoch": 0.4290732018839307, "grad_norm": 0.3129297107907665, "learning_rate": 1.2747452893555473e-05, "loss": 0.1844, "step": 5375 }, { "epoch": 0.42915302945637424, "grad_norm": 0.3190244571441304, "learning_rate": 1.2744966837734045e-05, "loss": 0.1473, "step": 5376 }, { "epoch": 0.42923285702881775, "grad_norm": 0.30679984641320707, "learning_rate": 1.2742480598422447e-05, "loss": 0.172, "step": 5377 }, { "epoch": 0.42931268460126126, "grad_norm": 0.3505855091957668, "learning_rate": 1.2739994175786874e-05, "loss": 0.1579, "step": 5378 }, { "epoch": 0.4293925121737048, "grad_norm": 0.2797100744552906, "learning_rate": 1.2737507569993534e-05, "loss": 0.1768, "step": 5379 }, { "epoch": 0.42947233974614835, "grad_norm": 0.3045905350392989, "learning_rate": 1.2735020781208647e-05, "loss": 0.1937, "step": 5380 }, { "epoch": 0.42955216731859186, "grad_norm": 0.2998149961062444, "learning_rate": 1.2732533809598442e-05, "loss": 0.1436, "step": 5381 }, { "epoch": 0.4296319948910354, "grad_norm": 0.33321141918965996, "learning_rate": 1.2730046655329168e-05, "loss": 0.2141, "step": 5382 }, { "epoch": 0.4297118224634789, "grad_norm": 0.3683225847560747, "learning_rate": 1.2727559318567078e-05, "loss": 0.2195, "step": 5383 }, { "epoch": 0.4297916500359224, "grad_norm": 0.27765419625497045, "learning_rate": 1.2725071799478443e-05, "loss": 0.2028, "step": 5384 }, { "epoch": 0.4298714776083659, "grad_norm": 0.2982628022574997, "learning_rate": 1.272258409822954e-05, "loss": 0.2062, "step": 5385 }, { "epoch": 0.42995130518080943, "grad_norm": 0.3089143780804942, "learning_rate": 1.2720096214986663e-05, "loss": 0.1537, "step": 5386 }, { "epoch": 0.430031132753253, "grad_norm": 0.32819173442183003, "learning_rate": 1.2717608149916126e-05, "loss": 0.2149, "step": 5387 }, { "epoch": 0.4301109603256965, "grad_norm": 0.29425509424526514, "learning_rate": 1.2715119903184232e-05, "loss": 0.182, "step": 5388 }, { "epoch": 0.43019078789814, "grad_norm": 0.27257589119560266, "learning_rate": 1.271263147495732e-05, "loss": 0.1946, "step": 5389 }, { "epoch": 0.43027061547058354, "grad_norm": 0.2919075483212954, "learning_rate": 1.271014286540173e-05, "loss": 0.1827, "step": 5390 }, { "epoch": 0.43035044304302705, "grad_norm": 0.2668049755968915, "learning_rate": 1.2707654074683813e-05, "loss": 0.2035, "step": 5391 }, { "epoch": 0.43043027061547057, "grad_norm": 0.2589577179122212, "learning_rate": 1.2705165102969934e-05, "loss": 0.1965, "step": 5392 }, { "epoch": 0.4305100981879141, "grad_norm": 0.2934107883932184, "learning_rate": 1.2702675950426477e-05, "loss": 0.1662, "step": 5393 }, { "epoch": 0.43058992576035765, "grad_norm": 0.34596683286597035, "learning_rate": 1.270018661721983e-05, "loss": 0.1896, "step": 5394 }, { "epoch": 0.43066975333280116, "grad_norm": 0.29343517969886684, "learning_rate": 1.2697697103516388e-05, "loss": 0.1553, "step": 5395 }, { "epoch": 0.4307495809052447, "grad_norm": 0.25173817949732447, "learning_rate": 1.2695207409482575e-05, "loss": 0.2337, "step": 5396 }, { "epoch": 0.4308294084776882, "grad_norm": 0.3345651770824702, "learning_rate": 1.2692717535284808e-05, "loss": 0.1702, "step": 5397 }, { "epoch": 0.4309092360501317, "grad_norm": 0.3279416823271428, "learning_rate": 1.2690227481089535e-05, "loss": 0.1607, "step": 5398 }, { "epoch": 0.4309890636225752, "grad_norm": 0.3110817754990198, "learning_rate": 1.2687737247063199e-05, "loss": 0.17, "step": 5399 }, { "epoch": 0.43106889119501873, "grad_norm": 0.28538413216157366, "learning_rate": 1.2685246833372265e-05, "loss": 0.2045, "step": 5400 }, { "epoch": 0.4311487187674623, "grad_norm": 0.251992509963571, "learning_rate": 1.2682756240183205e-05, "loss": 0.1547, "step": 5401 }, { "epoch": 0.4312285463399058, "grad_norm": 0.28867674448201125, "learning_rate": 1.2680265467662512e-05, "loss": 0.1684, "step": 5402 }, { "epoch": 0.43130837391234933, "grad_norm": 0.33511542865742333, "learning_rate": 1.2677774515976676e-05, "loss": 0.195, "step": 5403 }, { "epoch": 0.43138820148479284, "grad_norm": 0.27653291817359904, "learning_rate": 1.2675283385292212e-05, "loss": 0.137, "step": 5404 }, { "epoch": 0.43146802905723636, "grad_norm": 0.29050242042573027, "learning_rate": 1.2672792075775637e-05, "loss": 0.1566, "step": 5405 }, { "epoch": 0.43154785662967987, "grad_norm": 0.27102633945125404, "learning_rate": 1.2670300587593494e-05, "loss": 0.1817, "step": 5406 }, { "epoch": 0.43162768420212344, "grad_norm": 0.30570178552297606, "learning_rate": 1.2667808920912319e-05, "loss": 0.1335, "step": 5407 }, { "epoch": 0.43170751177456695, "grad_norm": 0.2832822180540813, "learning_rate": 1.266531707589868e-05, "loss": 0.1472, "step": 5408 }, { "epoch": 0.43178733934701047, "grad_norm": 0.2886690573280027, "learning_rate": 1.2662825052719141e-05, "loss": 0.1841, "step": 5409 }, { "epoch": 0.431867166919454, "grad_norm": 0.3296444964967236, "learning_rate": 1.2660332851540285e-05, "loss": 0.165, "step": 5410 }, { "epoch": 0.4319469944918975, "grad_norm": 0.3038624590053991, "learning_rate": 1.2657840472528706e-05, "loss": 0.1588, "step": 5411 }, { "epoch": 0.432026822064341, "grad_norm": 0.3508117530093556, "learning_rate": 1.2655347915851009e-05, "loss": 0.1987, "step": 5412 }, { "epoch": 0.4321066496367845, "grad_norm": 0.35114089081218325, "learning_rate": 1.2652855181673813e-05, "loss": 0.166, "step": 5413 }, { "epoch": 0.4321864772092281, "grad_norm": 0.2559036690881374, "learning_rate": 1.2650362270163745e-05, "loss": 0.2229, "step": 5414 }, { "epoch": 0.4322663047816716, "grad_norm": 0.2909765673690706, "learning_rate": 1.2647869181487451e-05, "loss": 0.2093, "step": 5415 }, { "epoch": 0.4323461323541151, "grad_norm": 0.2736112128462744, "learning_rate": 1.2645375915811578e-05, "loss": 0.1744, "step": 5416 }, { "epoch": 0.43242595992655863, "grad_norm": 0.26820481069436947, "learning_rate": 1.2642882473302795e-05, "loss": 0.1895, "step": 5417 }, { "epoch": 0.43250578749900215, "grad_norm": 0.2868936465080684, "learning_rate": 1.2640388854127777e-05, "loss": 0.1606, "step": 5418 }, { "epoch": 0.43258561507144566, "grad_norm": 0.3226728177674796, "learning_rate": 1.2637895058453213e-05, "loss": 0.2012, "step": 5419 }, { "epoch": 0.4326654426438892, "grad_norm": 0.31707090013998196, "learning_rate": 1.2635401086445805e-05, "loss": 0.2082, "step": 5420 }, { "epoch": 0.43274527021633274, "grad_norm": 0.29049998727353843, "learning_rate": 1.2632906938272262e-05, "loss": 0.1922, "step": 5421 }, { "epoch": 0.43282509778877626, "grad_norm": 0.3276881552854173, "learning_rate": 1.2630412614099312e-05, "loss": 0.1551, "step": 5422 }, { "epoch": 0.4329049253612198, "grad_norm": 0.33411284302433314, "learning_rate": 1.2627918114093687e-05, "loss": 0.2193, "step": 5423 }, { "epoch": 0.4329847529336633, "grad_norm": 0.29565612470105074, "learning_rate": 1.2625423438422138e-05, "loss": 0.1711, "step": 5424 }, { "epoch": 0.4330645805061068, "grad_norm": 0.31795754179776603, "learning_rate": 1.262292858725142e-05, "loss": 0.1512, "step": 5425 }, { "epoch": 0.4331444080785503, "grad_norm": 0.2709158590012488, "learning_rate": 1.2620433560748307e-05, "loss": 0.1498, "step": 5426 }, { "epoch": 0.4332242356509938, "grad_norm": 0.3148593383248773, "learning_rate": 1.2617938359079578e-05, "loss": 0.1802, "step": 5427 }, { "epoch": 0.4333040632234374, "grad_norm": 0.30691182645094384, "learning_rate": 1.2615442982412032e-05, "loss": 0.1853, "step": 5428 }, { "epoch": 0.4333838907958809, "grad_norm": 0.2984256912405124, "learning_rate": 1.2612947430912472e-05, "loss": 0.1843, "step": 5429 }, { "epoch": 0.4334637183683244, "grad_norm": 0.28767665142427395, "learning_rate": 1.2610451704747717e-05, "loss": 0.1592, "step": 5430 }, { "epoch": 0.43354354594076794, "grad_norm": 0.2472890201858611, "learning_rate": 1.26079558040846e-05, "loss": 0.157, "step": 5431 }, { "epoch": 0.43362337351321145, "grad_norm": 0.29514608551823734, "learning_rate": 1.2605459729089953e-05, "loss": 0.1694, "step": 5432 }, { "epoch": 0.43370320108565497, "grad_norm": 0.33017016766461893, "learning_rate": 1.2602963479930636e-05, "loss": 0.1624, "step": 5433 }, { "epoch": 0.4337830286580985, "grad_norm": 0.2648097563509407, "learning_rate": 1.2600467056773509e-05, "loss": 0.2396, "step": 5434 }, { "epoch": 0.43386285623054205, "grad_norm": 0.32982220459229245, "learning_rate": 1.2597970459785454e-05, "loss": 0.1305, "step": 5435 }, { "epoch": 0.43394268380298556, "grad_norm": 0.3096390970416305, "learning_rate": 1.2595473689133352e-05, "loss": 0.1645, "step": 5436 }, { "epoch": 0.4340225113754291, "grad_norm": 0.3124425612049255, "learning_rate": 1.2592976744984109e-05, "loss": 0.1801, "step": 5437 }, { "epoch": 0.4341023389478726, "grad_norm": 0.33551663703719237, "learning_rate": 1.2590479627504628e-05, "loss": 0.1708, "step": 5438 }, { "epoch": 0.4341821665203161, "grad_norm": 0.30111212859680675, "learning_rate": 1.258798233686184e-05, "loss": 0.1654, "step": 5439 }, { "epoch": 0.4342619940927596, "grad_norm": 0.2826699902507544, "learning_rate": 1.2585484873222674e-05, "loss": 0.1884, "step": 5440 }, { "epoch": 0.4343418216652032, "grad_norm": 0.2866370733535927, "learning_rate": 1.2582987236754071e-05, "loss": 0.1521, "step": 5441 }, { "epoch": 0.4344216492376467, "grad_norm": 0.282822618369002, "learning_rate": 1.2580489427622999e-05, "loss": 0.1809, "step": 5442 }, { "epoch": 0.4345014768100902, "grad_norm": 0.32435417946144246, "learning_rate": 1.2577991445996416e-05, "loss": 0.2302, "step": 5443 }, { "epoch": 0.43458130438253373, "grad_norm": 0.2702581227673874, "learning_rate": 1.2575493292041311e-05, "loss": 0.1765, "step": 5444 }, { "epoch": 0.43466113195497724, "grad_norm": 0.2854288292925683, "learning_rate": 1.257299496592467e-05, "loss": 0.166, "step": 5445 }, { "epoch": 0.43474095952742076, "grad_norm": 0.29604829583023706, "learning_rate": 1.25704964678135e-05, "loss": 0.2193, "step": 5446 }, { "epoch": 0.43482078709986427, "grad_norm": 0.31389170960408747, "learning_rate": 1.2567997797874812e-05, "loss": 0.199, "step": 5447 }, { "epoch": 0.43490061467230784, "grad_norm": 0.27142463396986977, "learning_rate": 1.2565498956275633e-05, "loss": 0.1497, "step": 5448 }, { "epoch": 0.43498044224475135, "grad_norm": 0.3147484260128504, "learning_rate": 1.2562999943183e-05, "loss": 0.1476, "step": 5449 }, { "epoch": 0.43506026981719487, "grad_norm": 0.2700860845754666, "learning_rate": 1.2560500758763967e-05, "loss": 0.131, "step": 5450 }, { "epoch": 0.4351400973896384, "grad_norm": 0.30748825796175727, "learning_rate": 1.2558001403185589e-05, "loss": 0.1633, "step": 5451 }, { "epoch": 0.4352199249620819, "grad_norm": 0.3115999147682147, "learning_rate": 1.2555501876614944e-05, "loss": 0.1376, "step": 5452 }, { "epoch": 0.4352997525345254, "grad_norm": 0.3020940483909605, "learning_rate": 1.2553002179219107e-05, "loss": 0.2238, "step": 5453 }, { "epoch": 0.4353795801069689, "grad_norm": 0.3081920000469378, "learning_rate": 1.255050231116518e-05, "loss": 0.1883, "step": 5454 }, { "epoch": 0.4354594076794125, "grad_norm": 0.27790502140067097, "learning_rate": 1.2548002272620268e-05, "loss": 0.1719, "step": 5455 }, { "epoch": 0.435539235251856, "grad_norm": 0.33162308825753434, "learning_rate": 1.2545502063751485e-05, "loss": 0.1464, "step": 5456 }, { "epoch": 0.4356190628242995, "grad_norm": 0.29554863489142175, "learning_rate": 1.2543001684725965e-05, "loss": 0.1473, "step": 5457 }, { "epoch": 0.43569889039674303, "grad_norm": 0.3410139484861662, "learning_rate": 1.2540501135710844e-05, "loss": 0.1523, "step": 5458 }, { "epoch": 0.43577871796918655, "grad_norm": 0.2759021878209667, "learning_rate": 1.2538000416873276e-05, "loss": 0.1393, "step": 5459 }, { "epoch": 0.43585854554163006, "grad_norm": 0.33701926592307024, "learning_rate": 1.2535499528380426e-05, "loss": 0.1698, "step": 5460 }, { "epoch": 0.4359383731140736, "grad_norm": 0.2710554323807659, "learning_rate": 1.2532998470399464e-05, "loss": 0.1585, "step": 5461 }, { "epoch": 0.43601820068651714, "grad_norm": 0.3916226477777021, "learning_rate": 1.2530497243097583e-05, "loss": 0.1975, "step": 5462 }, { "epoch": 0.43609802825896066, "grad_norm": 0.3102226958351133, "learning_rate": 1.2527995846641971e-05, "loss": 0.1829, "step": 5463 }, { "epoch": 0.43617785583140417, "grad_norm": 0.3274602561171986, "learning_rate": 1.2525494281199844e-05, "loss": 0.2084, "step": 5464 }, { "epoch": 0.4362576834038477, "grad_norm": 0.2893833734641574, "learning_rate": 1.2522992546938415e-05, "loss": 0.1828, "step": 5465 }, { "epoch": 0.4363375109762912, "grad_norm": 0.30402887596462413, "learning_rate": 1.2520490644024922e-05, "loss": 0.1664, "step": 5466 }, { "epoch": 0.4364173385487347, "grad_norm": 0.24404742486824813, "learning_rate": 1.2517988572626602e-05, "loss": 0.2257, "step": 5467 }, { "epoch": 0.4364971661211783, "grad_norm": 0.2889993962759433, "learning_rate": 1.2515486332910714e-05, "loss": 0.1822, "step": 5468 }, { "epoch": 0.4365769936936218, "grad_norm": 0.2862034070407841, "learning_rate": 1.2512983925044518e-05, "loss": 0.1544, "step": 5469 }, { "epoch": 0.4366568212660653, "grad_norm": 0.2995193354370297, "learning_rate": 1.2510481349195292e-05, "loss": 0.1714, "step": 5470 }, { "epoch": 0.4367366488385088, "grad_norm": 0.38495149311282245, "learning_rate": 1.250797860553032e-05, "loss": 0.154, "step": 5471 }, { "epoch": 0.43681647641095234, "grad_norm": 0.3441323042222529, "learning_rate": 1.2505475694216905e-05, "loss": 0.1908, "step": 5472 }, { "epoch": 0.43689630398339585, "grad_norm": 0.3061138078107526, "learning_rate": 1.2502972615422352e-05, "loss": 0.1765, "step": 5473 }, { "epoch": 0.43697613155583936, "grad_norm": 0.29933744718616373, "learning_rate": 1.250046936931399e-05, "loss": 0.2027, "step": 5474 }, { "epoch": 0.43705595912828293, "grad_norm": 0.2737859185324608, "learning_rate": 1.2497965956059144e-05, "loss": 0.1953, "step": 5475 }, { "epoch": 0.43713578670072645, "grad_norm": 0.332894808622963, "learning_rate": 1.249546237582516e-05, "loss": 0.1911, "step": 5476 }, { "epoch": 0.43721561427316996, "grad_norm": 0.2939474383016848, "learning_rate": 1.2492958628779393e-05, "loss": 0.2002, "step": 5477 }, { "epoch": 0.4372954418456135, "grad_norm": 0.31779802173581273, "learning_rate": 1.2490454715089205e-05, "loss": 0.1966, "step": 5478 }, { "epoch": 0.437375269418057, "grad_norm": 0.321883354140122, "learning_rate": 1.2487950634921975e-05, "loss": 0.1827, "step": 5479 }, { "epoch": 0.4374550969905005, "grad_norm": 0.3166362815724828, "learning_rate": 1.2485446388445091e-05, "loss": 0.1744, "step": 5480 }, { "epoch": 0.437534924562944, "grad_norm": 0.309192840142328, "learning_rate": 1.2482941975825953e-05, "loss": 0.1703, "step": 5481 }, { "epoch": 0.4376147521353876, "grad_norm": 0.3417941331568857, "learning_rate": 1.2480437397231968e-05, "loss": 0.1565, "step": 5482 }, { "epoch": 0.4376945797078311, "grad_norm": 0.3347648773864014, "learning_rate": 1.2477932652830561e-05, "loss": 0.2221, "step": 5483 }, { "epoch": 0.4377744072802746, "grad_norm": 0.27099165213861715, "learning_rate": 1.2475427742789163e-05, "loss": 0.1806, "step": 5484 }, { "epoch": 0.4378542348527181, "grad_norm": 0.3384951440841502, "learning_rate": 1.2472922667275216e-05, "loss": 0.157, "step": 5485 }, { "epoch": 0.43793406242516164, "grad_norm": 0.3159170231378893, "learning_rate": 1.2470417426456176e-05, "loss": 0.1291, "step": 5486 }, { "epoch": 0.43801388999760515, "grad_norm": 0.3190229808155445, "learning_rate": 1.2467912020499504e-05, "loss": 0.2017, "step": 5487 }, { "epoch": 0.43809371757004867, "grad_norm": 0.34901960019055117, "learning_rate": 1.2465406449572683e-05, "loss": 0.1717, "step": 5488 }, { "epoch": 0.43817354514249224, "grad_norm": 0.3024888152391532, "learning_rate": 1.2462900713843195e-05, "loss": 0.1704, "step": 5489 }, { "epoch": 0.43825337271493575, "grad_norm": 0.3136661249105996, "learning_rate": 1.2460394813478544e-05, "loss": 0.2082, "step": 5490 }, { "epoch": 0.43833320028737927, "grad_norm": 0.26975418091361364, "learning_rate": 1.2457888748646239e-05, "loss": 0.1877, "step": 5491 }, { "epoch": 0.4384130278598228, "grad_norm": 0.28312530534198116, "learning_rate": 1.2455382519513794e-05, "loss": 0.1684, "step": 5492 }, { "epoch": 0.4384928554322663, "grad_norm": 0.28818086297296047, "learning_rate": 1.2452876126248743e-05, "loss": 0.1782, "step": 5493 }, { "epoch": 0.4385726830047098, "grad_norm": 0.24272797419406827, "learning_rate": 1.2450369569018635e-05, "loss": 0.1455, "step": 5494 }, { "epoch": 0.4386525105771534, "grad_norm": 0.3000027264325837, "learning_rate": 1.2447862847991012e-05, "loss": 0.1573, "step": 5495 }, { "epoch": 0.4387323381495969, "grad_norm": 0.29449116514803164, "learning_rate": 1.244535596333345e-05, "loss": 0.1998, "step": 5496 }, { "epoch": 0.4388121657220404, "grad_norm": 0.3038144720030599, "learning_rate": 1.2442848915213516e-05, "loss": 0.1413, "step": 5497 }, { "epoch": 0.4388919932944839, "grad_norm": 0.27385913021461034, "learning_rate": 1.2440341703798802e-05, "loss": 0.1637, "step": 5498 }, { "epoch": 0.43897182086692743, "grad_norm": 0.2719048147904193, "learning_rate": 1.2437834329256902e-05, "loss": 0.1305, "step": 5499 }, { "epoch": 0.43905164843937095, "grad_norm": 0.36697417897134404, "learning_rate": 1.2435326791755425e-05, "loss": 0.1921, "step": 5500 }, { "epoch": 0.43913147601181446, "grad_norm": 0.2669024781046697, "learning_rate": 1.2432819091461987e-05, "loss": 0.1648, "step": 5501 }, { "epoch": 0.43921130358425803, "grad_norm": 0.30447866926456196, "learning_rate": 1.2430311228544221e-05, "loss": 0.2129, "step": 5502 }, { "epoch": 0.43929113115670154, "grad_norm": 0.30573102292176807, "learning_rate": 1.242780320316977e-05, "loss": 0.1861, "step": 5503 }, { "epoch": 0.43937095872914506, "grad_norm": 0.3000525676244329, "learning_rate": 1.242529501550628e-05, "loss": 0.1636, "step": 5504 }, { "epoch": 0.43945078630158857, "grad_norm": 0.29310629915131986, "learning_rate": 1.2422786665721417e-05, "loss": 0.143, "step": 5505 }, { "epoch": 0.4395306138740321, "grad_norm": 0.26414099208878344, "learning_rate": 1.2420278153982855e-05, "loss": 0.1789, "step": 5506 }, { "epoch": 0.4396104414464756, "grad_norm": 0.32824098769080606, "learning_rate": 1.2417769480458275e-05, "loss": 0.1816, "step": 5507 }, { "epoch": 0.4396902690189191, "grad_norm": 0.30555244753150934, "learning_rate": 1.2415260645315373e-05, "loss": 0.1871, "step": 5508 }, { "epoch": 0.4397700965913627, "grad_norm": 0.3746927587549104, "learning_rate": 1.2412751648721855e-05, "loss": 0.1823, "step": 5509 }, { "epoch": 0.4398499241638062, "grad_norm": 0.29203787201161635, "learning_rate": 1.2410242490845439e-05, "loss": 0.1717, "step": 5510 }, { "epoch": 0.4399297517362497, "grad_norm": 0.2635548746385933, "learning_rate": 1.2407733171853849e-05, "loss": 0.1936, "step": 5511 }, { "epoch": 0.4400095793086932, "grad_norm": 0.28618441013408213, "learning_rate": 1.2405223691914825e-05, "loss": 0.1703, "step": 5512 }, { "epoch": 0.44008940688113674, "grad_norm": 0.3235259285448347, "learning_rate": 1.2402714051196117e-05, "loss": 0.1792, "step": 5513 }, { "epoch": 0.44016923445358025, "grad_norm": 0.3151653228106383, "learning_rate": 1.2400204249865485e-05, "loss": 0.1752, "step": 5514 }, { "epoch": 0.44024906202602376, "grad_norm": 0.260148314077744, "learning_rate": 1.2397694288090693e-05, "loss": 0.1542, "step": 5515 }, { "epoch": 0.44032888959846733, "grad_norm": 0.33759760914914655, "learning_rate": 1.239518416603953e-05, "loss": 0.1874, "step": 5516 }, { "epoch": 0.44040871717091085, "grad_norm": 0.325791752074347, "learning_rate": 1.239267388387978e-05, "loss": 0.1877, "step": 5517 }, { "epoch": 0.44048854474335436, "grad_norm": 0.27370748702099384, "learning_rate": 1.2390163441779255e-05, "loss": 0.2301, "step": 5518 }, { "epoch": 0.4405683723157979, "grad_norm": 0.2950180114023816, "learning_rate": 1.238765283990576e-05, "loss": 0.1792, "step": 5519 }, { "epoch": 0.4406481998882414, "grad_norm": 0.3181288744993802, "learning_rate": 1.2385142078427122e-05, "loss": 0.2244, "step": 5520 }, { "epoch": 0.4407280274606849, "grad_norm": 0.3152546907386803, "learning_rate": 1.2382631157511176e-05, "loss": 0.1756, "step": 5521 }, { "epoch": 0.4408078550331284, "grad_norm": 0.2898667763815036, "learning_rate": 1.2380120077325763e-05, "loss": 0.2088, "step": 5522 }, { "epoch": 0.440887682605572, "grad_norm": 0.30677020090458373, "learning_rate": 1.2377608838038747e-05, "loss": 0.1781, "step": 5523 }, { "epoch": 0.4409675101780155, "grad_norm": 0.35982218650306336, "learning_rate": 1.2375097439817983e-05, "loss": 0.1915, "step": 5524 }, { "epoch": 0.441047337750459, "grad_norm": 0.28398414066832806, "learning_rate": 1.2372585882831358e-05, "loss": 0.1703, "step": 5525 }, { "epoch": 0.4411271653229025, "grad_norm": 0.29582324857955383, "learning_rate": 1.2370074167246758e-05, "loss": 0.1378, "step": 5526 }, { "epoch": 0.44120699289534604, "grad_norm": 0.26362291462406673, "learning_rate": 1.2367562293232075e-05, "loss": 0.1516, "step": 5527 }, { "epoch": 0.44128682046778955, "grad_norm": 0.26786440610589185, "learning_rate": 1.2365050260955226e-05, "loss": 0.1387, "step": 5528 }, { "epoch": 0.4413666480402331, "grad_norm": 0.3193286209246344, "learning_rate": 1.2362538070584124e-05, "loss": 0.1763, "step": 5529 }, { "epoch": 0.44144647561267664, "grad_norm": 0.3152056995438779, "learning_rate": 1.2360025722286704e-05, "loss": 0.1726, "step": 5530 }, { "epoch": 0.44152630318512015, "grad_norm": 0.3085591590155923, "learning_rate": 1.2357513216230902e-05, "loss": 0.1577, "step": 5531 }, { "epoch": 0.44160613075756366, "grad_norm": 0.3222792095746643, "learning_rate": 1.2355000552584674e-05, "loss": 0.1444, "step": 5532 }, { "epoch": 0.4416859583300072, "grad_norm": 0.30838046312734313, "learning_rate": 1.2352487731515975e-05, "loss": 0.1501, "step": 5533 }, { "epoch": 0.4417657859024507, "grad_norm": 0.29300321673200685, "learning_rate": 1.2349974753192782e-05, "loss": 0.1631, "step": 5534 }, { "epoch": 0.4418456134748942, "grad_norm": 0.3045352313442693, "learning_rate": 1.2347461617783077e-05, "loss": 0.1991, "step": 5535 }, { "epoch": 0.4419254410473378, "grad_norm": 0.3113361319777692, "learning_rate": 1.2344948325454855e-05, "loss": 0.1664, "step": 5536 }, { "epoch": 0.4420052686197813, "grad_norm": 0.2950210860438739, "learning_rate": 1.2342434876376115e-05, "loss": 0.181, "step": 5537 }, { "epoch": 0.4420850961922248, "grad_norm": 0.29521306527266455, "learning_rate": 1.2339921270714873e-05, "loss": 0.1741, "step": 5538 }, { "epoch": 0.4421649237646683, "grad_norm": 0.2597723924868234, "learning_rate": 1.2337407508639154e-05, "loss": 0.174, "step": 5539 }, { "epoch": 0.44224475133711183, "grad_norm": 0.3022067217413586, "learning_rate": 1.2334893590316997e-05, "loss": 0.2272, "step": 5540 }, { "epoch": 0.44232457890955534, "grad_norm": 0.31141584552794493, "learning_rate": 1.2332379515916436e-05, "loss": 0.1759, "step": 5541 }, { "epoch": 0.44240440648199886, "grad_norm": 0.3181577726798292, "learning_rate": 1.2329865285605542e-05, "loss": 0.1846, "step": 5542 }, { "epoch": 0.4424842340544424, "grad_norm": 0.31762915019805704, "learning_rate": 1.2327350899552373e-05, "loss": 0.1556, "step": 5543 }, { "epoch": 0.44256406162688594, "grad_norm": 0.39148791632571234, "learning_rate": 1.2324836357925003e-05, "loss": 0.1975, "step": 5544 }, { "epoch": 0.44264388919932945, "grad_norm": 0.37464070288551254, "learning_rate": 1.2322321660891526e-05, "loss": 0.1741, "step": 5545 }, { "epoch": 0.44272371677177297, "grad_norm": 0.3131248590582152, "learning_rate": 1.2319806808620034e-05, "loss": 0.2001, "step": 5546 }, { "epoch": 0.4428035443442165, "grad_norm": 0.3234144078163241, "learning_rate": 1.2317291801278638e-05, "loss": 0.1436, "step": 5547 }, { "epoch": 0.44288337191666, "grad_norm": 0.379889281405439, "learning_rate": 1.2314776639035456e-05, "loss": 0.2166, "step": 5548 }, { "epoch": 0.4429631994891035, "grad_norm": 0.3398178481117712, "learning_rate": 1.2312261322058615e-05, "loss": 0.1622, "step": 5549 }, { "epoch": 0.4430430270615471, "grad_norm": 0.2658913788066796, "learning_rate": 1.2309745850516257e-05, "loss": 0.1889, "step": 5550 }, { "epoch": 0.4431228546339906, "grad_norm": 0.3095335533441068, "learning_rate": 1.2307230224576532e-05, "loss": 0.1454, "step": 5551 }, { "epoch": 0.4432026822064341, "grad_norm": 0.32144143147184606, "learning_rate": 1.2304714444407594e-05, "loss": 0.18, "step": 5552 }, { "epoch": 0.4432825097788776, "grad_norm": 0.3654366884443437, "learning_rate": 1.2302198510177614e-05, "loss": 0.1687, "step": 5553 }, { "epoch": 0.44336233735132113, "grad_norm": 0.33674406061629336, "learning_rate": 1.2299682422054777e-05, "loss": 0.1752, "step": 5554 }, { "epoch": 0.44344216492376465, "grad_norm": 0.2689275940185815, "learning_rate": 1.229716618020727e-05, "loss": 0.1361, "step": 5555 }, { "epoch": 0.4435219924962082, "grad_norm": 0.3623881506439358, "learning_rate": 1.2294649784803297e-05, "loss": 0.1497, "step": 5556 }, { "epoch": 0.44360182006865173, "grad_norm": 0.2779289062067917, "learning_rate": 1.2292133236011064e-05, "loss": 0.171, "step": 5557 }, { "epoch": 0.44368164764109524, "grad_norm": 0.2685399244149889, "learning_rate": 1.2289616533998798e-05, "loss": 0.1708, "step": 5558 }, { "epoch": 0.44376147521353876, "grad_norm": 0.3392222471527899, "learning_rate": 1.2287099678934724e-05, "loss": 0.1779, "step": 5559 }, { "epoch": 0.4438413027859823, "grad_norm": 0.36258330328200045, "learning_rate": 1.228458267098709e-05, "loss": 0.1669, "step": 5560 }, { "epoch": 0.4439211303584258, "grad_norm": 0.3435277404212699, "learning_rate": 1.2282065510324144e-05, "loss": 0.1281, "step": 5561 }, { "epoch": 0.4440009579308693, "grad_norm": 0.27174565199746775, "learning_rate": 1.2279548197114153e-05, "loss": 0.201, "step": 5562 }, { "epoch": 0.44408078550331287, "grad_norm": 0.2787564377097676, "learning_rate": 1.2277030731525382e-05, "loss": 0.1813, "step": 5563 }, { "epoch": 0.4441606130757564, "grad_norm": 0.28865114200713665, "learning_rate": 1.227451311372612e-05, "loss": 0.1513, "step": 5564 }, { "epoch": 0.4442404406481999, "grad_norm": 0.2881321930847221, "learning_rate": 1.2271995343884657e-05, "loss": 0.1945, "step": 5565 }, { "epoch": 0.4443202682206434, "grad_norm": 0.3034807168465134, "learning_rate": 1.2269477422169295e-05, "loss": 0.1889, "step": 5566 }, { "epoch": 0.4444000957930869, "grad_norm": 0.2640403075950307, "learning_rate": 1.2266959348748353e-05, "loss": 0.1633, "step": 5567 }, { "epoch": 0.44447992336553044, "grad_norm": 0.3385051716371762, "learning_rate": 1.2264441123790145e-05, "loss": 0.1399, "step": 5568 }, { "epoch": 0.44455975093797395, "grad_norm": 0.3114056790038115, "learning_rate": 1.226192274746301e-05, "loss": 0.1704, "step": 5569 }, { "epoch": 0.4446395785104175, "grad_norm": 0.35030793174170816, "learning_rate": 1.2259404219935292e-05, "loss": 0.1782, "step": 5570 }, { "epoch": 0.44471940608286104, "grad_norm": 0.3942364478033217, "learning_rate": 1.2256885541375345e-05, "loss": 0.1678, "step": 5571 }, { "epoch": 0.44479923365530455, "grad_norm": 0.3033686067034372, "learning_rate": 1.2254366711951527e-05, "loss": 0.1695, "step": 5572 }, { "epoch": 0.44487906122774806, "grad_norm": 0.31785717269268926, "learning_rate": 1.225184773183222e-05, "loss": 0.1923, "step": 5573 }, { "epoch": 0.4449588888001916, "grad_norm": 0.3138070709509947, "learning_rate": 1.2249328601185801e-05, "loss": 0.1831, "step": 5574 }, { "epoch": 0.4450387163726351, "grad_norm": 0.3789369420235182, "learning_rate": 1.2246809320180665e-05, "loss": 0.165, "step": 5575 }, { "epoch": 0.4451185439450786, "grad_norm": 0.27329774156455994, "learning_rate": 1.2244289888985224e-05, "loss": 0.1792, "step": 5576 }, { "epoch": 0.4451983715175222, "grad_norm": 0.3430143311754666, "learning_rate": 1.224177030776788e-05, "loss": 0.2017, "step": 5577 }, { "epoch": 0.4452781990899657, "grad_norm": 0.2919591227387375, "learning_rate": 1.2239250576697064e-05, "loss": 0.1723, "step": 5578 }, { "epoch": 0.4453580266624092, "grad_norm": 0.3319112204467857, "learning_rate": 1.2236730695941205e-05, "loss": 0.1748, "step": 5579 }, { "epoch": 0.4454378542348527, "grad_norm": 0.29267365079117447, "learning_rate": 1.2234210665668758e-05, "loss": 0.1637, "step": 5580 }, { "epoch": 0.44551768180729623, "grad_norm": 0.29971827739466406, "learning_rate": 1.2231690486048164e-05, "loss": 0.1512, "step": 5581 }, { "epoch": 0.44559750937973974, "grad_norm": 0.2908668403250402, "learning_rate": 1.2229170157247897e-05, "loss": 0.1518, "step": 5582 }, { "epoch": 0.4456773369521833, "grad_norm": 0.28375625123108444, "learning_rate": 1.2226649679436421e-05, "loss": 0.1518, "step": 5583 }, { "epoch": 0.4457571645246268, "grad_norm": 0.31098738800442743, "learning_rate": 1.2224129052782233e-05, "loss": 0.1654, "step": 5584 }, { "epoch": 0.44583699209707034, "grad_norm": 0.32758208800813177, "learning_rate": 1.2221608277453814e-05, "loss": 0.1832, "step": 5585 }, { "epoch": 0.44591681966951385, "grad_norm": 0.2833776587991187, "learning_rate": 1.2219087353619675e-05, "loss": 0.1563, "step": 5586 }, { "epoch": 0.44599664724195737, "grad_norm": 0.33916793530565403, "learning_rate": 1.2216566281448328e-05, "loss": 0.1829, "step": 5587 }, { "epoch": 0.4460764748144009, "grad_norm": 0.3134380032990088, "learning_rate": 1.2214045061108298e-05, "loss": 0.1566, "step": 5588 }, { "epoch": 0.4461563023868444, "grad_norm": 0.2658828497507241, "learning_rate": 1.2211523692768116e-05, "loss": 0.1653, "step": 5589 }, { "epoch": 0.44623612995928796, "grad_norm": 0.2846386610067198, "learning_rate": 1.2209002176596329e-05, "loss": 0.1376, "step": 5590 }, { "epoch": 0.4463159575317315, "grad_norm": 0.2692505004003635, "learning_rate": 1.2206480512761487e-05, "loss": 0.1663, "step": 5591 }, { "epoch": 0.446395785104175, "grad_norm": 0.26149820887212644, "learning_rate": 1.2203958701432155e-05, "loss": 0.1833, "step": 5592 }, { "epoch": 0.4464756126766185, "grad_norm": 0.2788540257721896, "learning_rate": 1.2201436742776907e-05, "loss": 0.1589, "step": 5593 }, { "epoch": 0.446555440249062, "grad_norm": 0.28518594799580693, "learning_rate": 1.2198914636964324e-05, "loss": 0.1923, "step": 5594 }, { "epoch": 0.44663526782150553, "grad_norm": 0.3498694494599433, "learning_rate": 1.2196392384163e-05, "loss": 0.1856, "step": 5595 }, { "epoch": 0.44671509539394905, "grad_norm": 0.2867611278114143, "learning_rate": 1.2193869984541535e-05, "loss": 0.1574, "step": 5596 }, { "epoch": 0.4467949229663926, "grad_norm": 0.2933612487084291, "learning_rate": 1.2191347438268548e-05, "loss": 0.1707, "step": 5597 }, { "epoch": 0.44687475053883613, "grad_norm": 0.2809308912141478, "learning_rate": 1.2188824745512654e-05, "loss": 0.1555, "step": 5598 }, { "epoch": 0.44695457811127964, "grad_norm": 0.32478407514296903, "learning_rate": 1.2186301906442483e-05, "loss": 0.178, "step": 5599 }, { "epoch": 0.44703440568372316, "grad_norm": 0.3229023352736412, "learning_rate": 1.2183778921226689e-05, "loss": 0.1532, "step": 5600 }, { "epoch": 0.44711423325616667, "grad_norm": 0.30331919375769895, "learning_rate": 1.2181255790033913e-05, "loss": 0.1743, "step": 5601 }, { "epoch": 0.4471940608286102, "grad_norm": 0.31964346007125677, "learning_rate": 1.217873251303282e-05, "loss": 0.2098, "step": 5602 }, { "epoch": 0.4472738884010537, "grad_norm": 0.2604670597806914, "learning_rate": 1.2176209090392082e-05, "loss": 0.1658, "step": 5603 }, { "epoch": 0.44735371597349727, "grad_norm": 0.3440201179604626, "learning_rate": 1.2173685522280379e-05, "loss": 0.1397, "step": 5604 }, { "epoch": 0.4474335435459408, "grad_norm": 0.3273243136390028, "learning_rate": 1.2171161808866398e-05, "loss": 0.2078, "step": 5605 }, { "epoch": 0.4475133711183843, "grad_norm": 0.2868746666386177, "learning_rate": 1.2168637950318844e-05, "loss": 0.1635, "step": 5606 }, { "epoch": 0.4475931986908278, "grad_norm": 0.2998774784187784, "learning_rate": 1.2166113946806424e-05, "loss": 0.1464, "step": 5607 }, { "epoch": 0.4476730262632713, "grad_norm": 0.2767385810128222, "learning_rate": 1.2163589798497861e-05, "loss": 0.1872, "step": 5608 }, { "epoch": 0.44775285383571484, "grad_norm": 0.27443878241945124, "learning_rate": 1.216106550556188e-05, "loss": 0.1747, "step": 5609 }, { "epoch": 0.44783268140815835, "grad_norm": 0.3331862853260466, "learning_rate": 1.2158541068167224e-05, "loss": 0.1622, "step": 5610 }, { "epoch": 0.4479125089806019, "grad_norm": 0.3019915270398584, "learning_rate": 1.215601648648264e-05, "loss": 0.1756, "step": 5611 }, { "epoch": 0.44799233655304543, "grad_norm": 0.32190836628117575, "learning_rate": 1.2153491760676885e-05, "loss": 0.1433, "step": 5612 }, { "epoch": 0.44807216412548895, "grad_norm": 0.3467194282452991, "learning_rate": 1.215096689091873e-05, "loss": 0.1811, "step": 5613 }, { "epoch": 0.44815199169793246, "grad_norm": 0.32810996019173805, "learning_rate": 1.2148441877376946e-05, "loss": 0.1857, "step": 5614 }, { "epoch": 0.448231819270376, "grad_norm": 0.3069182836167614, "learning_rate": 1.214591672022033e-05, "loss": 0.1323, "step": 5615 }, { "epoch": 0.4483116468428195, "grad_norm": 0.3008407222272413, "learning_rate": 1.2143391419617669e-05, "loss": 0.1806, "step": 5616 }, { "epoch": 0.44839147441526306, "grad_norm": 0.325182718135755, "learning_rate": 1.2140865975737776e-05, "loss": 0.1281, "step": 5617 }, { "epoch": 0.44847130198770657, "grad_norm": 0.27028154304614954, "learning_rate": 1.2138340388749467e-05, "loss": 0.1639, "step": 5618 }, { "epoch": 0.4485511295601501, "grad_norm": 0.28839129029458926, "learning_rate": 1.2135814658821566e-05, "loss": 0.1783, "step": 5619 }, { "epoch": 0.4486309571325936, "grad_norm": 0.3018637323302817, "learning_rate": 1.2133288786122903e-05, "loss": 0.1723, "step": 5620 }, { "epoch": 0.4487107847050371, "grad_norm": 0.3171449930780361, "learning_rate": 1.2130762770822327e-05, "loss": 0.1718, "step": 5621 }, { "epoch": 0.4487906122774806, "grad_norm": 0.2836492592073251, "learning_rate": 1.2128236613088695e-05, "loss": 0.1623, "step": 5622 }, { "epoch": 0.44887043984992414, "grad_norm": 0.2774680085873443, "learning_rate": 1.2125710313090866e-05, "loss": 0.1803, "step": 5623 }, { "epoch": 0.4489502674223677, "grad_norm": 0.290444637595177, "learning_rate": 1.2123183870997716e-05, "loss": 0.1571, "step": 5624 }, { "epoch": 0.4490300949948112, "grad_norm": 0.316144133547127, "learning_rate": 1.2120657286978128e-05, "loss": 0.1759, "step": 5625 }, { "epoch": 0.44910992256725474, "grad_norm": 0.31259546709305236, "learning_rate": 1.2118130561200992e-05, "loss": 0.2032, "step": 5626 }, { "epoch": 0.44918975013969825, "grad_norm": 0.3234554956713654, "learning_rate": 1.2115603693835207e-05, "loss": 0.2063, "step": 5627 }, { "epoch": 0.44926957771214177, "grad_norm": 0.3518307315089583, "learning_rate": 1.2113076685049689e-05, "loss": 0.1927, "step": 5628 }, { "epoch": 0.4493494052845853, "grad_norm": 0.28030368268619416, "learning_rate": 1.2110549535013356e-05, "loss": 0.1883, "step": 5629 }, { "epoch": 0.4494292328570288, "grad_norm": 0.2928863520877898, "learning_rate": 1.2108022243895143e-05, "loss": 0.1505, "step": 5630 }, { "epoch": 0.44950906042947236, "grad_norm": 0.29812074956783097, "learning_rate": 1.210549481186398e-05, "loss": 0.1631, "step": 5631 }, { "epoch": 0.4495888880019159, "grad_norm": 0.2884703697190271, "learning_rate": 1.2102967239088826e-05, "loss": 0.1819, "step": 5632 }, { "epoch": 0.4496687155743594, "grad_norm": 0.34675141082217825, "learning_rate": 1.2100439525738633e-05, "loss": 0.1252, "step": 5633 }, { "epoch": 0.4497485431468029, "grad_norm": 0.27566924015996014, "learning_rate": 1.2097911671982367e-05, "loss": 0.2064, "step": 5634 }, { "epoch": 0.4498283707192464, "grad_norm": 0.29913901496371487, "learning_rate": 1.2095383677989013e-05, "loss": 0.1514, "step": 5635 }, { "epoch": 0.44990819829168993, "grad_norm": 0.3053756191462273, "learning_rate": 1.2092855543927548e-05, "loss": 0.1832, "step": 5636 }, { "epoch": 0.44998802586413345, "grad_norm": 0.33765603648645315, "learning_rate": 1.2090327269966975e-05, "loss": 0.1511, "step": 5637 }, { "epoch": 0.450067853436577, "grad_norm": 0.2708359475976676, "learning_rate": 1.2087798856276293e-05, "loss": 0.205, "step": 5638 }, { "epoch": 0.45014768100902053, "grad_norm": 0.26180755756968893, "learning_rate": 1.2085270303024523e-05, "loss": 0.1858, "step": 5639 }, { "epoch": 0.45022750858146404, "grad_norm": 0.2631323476652577, "learning_rate": 1.2082741610380686e-05, "loss": 0.1381, "step": 5640 }, { "epoch": 0.45030733615390756, "grad_norm": 0.2836177743409817, "learning_rate": 1.2080212778513814e-05, "loss": 0.2118, "step": 5641 }, { "epoch": 0.45038716372635107, "grad_norm": 0.24616668824287913, "learning_rate": 1.2077683807592948e-05, "loss": 0.1791, "step": 5642 }, { "epoch": 0.4504669912987946, "grad_norm": 0.31710960415160755, "learning_rate": 1.2075154697787143e-05, "loss": 0.1836, "step": 5643 }, { "epoch": 0.45054681887123815, "grad_norm": 0.2562145132055253, "learning_rate": 1.2072625449265459e-05, "loss": 0.1896, "step": 5644 }, { "epoch": 0.45062664644368167, "grad_norm": 0.2855483204635222, "learning_rate": 1.2070096062196962e-05, "loss": 0.1509, "step": 5645 }, { "epoch": 0.4507064740161252, "grad_norm": 0.23484704895258446, "learning_rate": 1.2067566536750741e-05, "loss": 0.1507, "step": 5646 }, { "epoch": 0.4507863015885687, "grad_norm": 0.292584429603899, "learning_rate": 1.2065036873095879e-05, "loss": 0.1371, "step": 5647 }, { "epoch": 0.4508661291610122, "grad_norm": 0.30973410191432077, "learning_rate": 1.2062507071401475e-05, "loss": 0.1921, "step": 5648 }, { "epoch": 0.4509459567334557, "grad_norm": 0.30437493572996244, "learning_rate": 1.2059977131836631e-05, "loss": 0.1755, "step": 5649 }, { "epoch": 0.45102578430589924, "grad_norm": 0.3143536506181641, "learning_rate": 1.205744705457047e-05, "loss": 0.1933, "step": 5650 }, { "epoch": 0.4511056118783428, "grad_norm": 0.2784529877272203, "learning_rate": 1.2054916839772116e-05, "loss": 0.1739, "step": 5651 }, { "epoch": 0.4511854394507863, "grad_norm": 0.26375663443939085, "learning_rate": 1.2052386487610703e-05, "loss": 0.1711, "step": 5652 }, { "epoch": 0.45126526702322983, "grad_norm": 0.2920895804008369, "learning_rate": 1.2049855998255376e-05, "loss": 0.1533, "step": 5653 }, { "epoch": 0.45134509459567335, "grad_norm": 0.2912889240736916, "learning_rate": 1.2047325371875286e-05, "loss": 0.1923, "step": 5654 }, { "epoch": 0.45142492216811686, "grad_norm": 0.3416276959561631, "learning_rate": 1.20447946086396e-05, "loss": 0.2311, "step": 5655 }, { "epoch": 0.4515047497405604, "grad_norm": 0.3249495215202874, "learning_rate": 1.2042263708717483e-05, "loss": 0.1304, "step": 5656 }, { "epoch": 0.4515845773130039, "grad_norm": 0.3201591846933308, "learning_rate": 1.203973267227812e-05, "loss": 0.1591, "step": 5657 }, { "epoch": 0.45166440488544746, "grad_norm": 0.26757125447225855, "learning_rate": 1.2037201499490702e-05, "loss": 0.176, "step": 5658 }, { "epoch": 0.45174423245789097, "grad_norm": 0.30432864106317953, "learning_rate": 1.2034670190524427e-05, "loss": 0.2073, "step": 5659 }, { "epoch": 0.4518240600303345, "grad_norm": 0.2865490667930366, "learning_rate": 1.2032138745548497e-05, "loss": 0.1678, "step": 5660 }, { "epoch": 0.451903887602778, "grad_norm": 0.2650716017554703, "learning_rate": 1.2029607164732138e-05, "loss": 0.1872, "step": 5661 }, { "epoch": 0.4519837151752215, "grad_norm": 0.309345602731453, "learning_rate": 1.2027075448244571e-05, "loss": 0.2368, "step": 5662 }, { "epoch": 0.452063542747665, "grad_norm": 0.31485892682423505, "learning_rate": 1.2024543596255032e-05, "loss": 0.1604, "step": 5663 }, { "epoch": 0.45214337032010854, "grad_norm": 0.3025735192906821, "learning_rate": 1.2022011608932765e-05, "loss": 0.1895, "step": 5664 }, { "epoch": 0.4522231978925521, "grad_norm": 0.37940360204326806, "learning_rate": 1.2019479486447025e-05, "loss": 0.1745, "step": 5665 }, { "epoch": 0.4523030254649956, "grad_norm": 0.2964437860483511, "learning_rate": 1.201694722896707e-05, "loss": 0.1779, "step": 5666 }, { "epoch": 0.45238285303743914, "grad_norm": 0.396023020631399, "learning_rate": 1.2014414836662175e-05, "loss": 0.1675, "step": 5667 }, { "epoch": 0.45246268060988265, "grad_norm": 0.25421158148602574, "learning_rate": 1.2011882309701626e-05, "loss": 0.1569, "step": 5668 }, { "epoch": 0.45254250818232616, "grad_norm": 0.2731868978020539, "learning_rate": 1.20093496482547e-05, "loss": 0.1642, "step": 5669 }, { "epoch": 0.4526223357547697, "grad_norm": 0.32722558967294607, "learning_rate": 1.2006816852490708e-05, "loss": 0.1419, "step": 5670 }, { "epoch": 0.4527021633272132, "grad_norm": 0.2841650411533331, "learning_rate": 1.2004283922578948e-05, "loss": 0.1687, "step": 5671 }, { "epoch": 0.45278199089965676, "grad_norm": 0.31570406599658385, "learning_rate": 1.2001750858688738e-05, "loss": 0.1767, "step": 5672 }, { "epoch": 0.4528618184721003, "grad_norm": 0.3334441847431638, "learning_rate": 1.1999217660989406e-05, "loss": 0.1658, "step": 5673 }, { "epoch": 0.4529416460445438, "grad_norm": 0.3070482516138079, "learning_rate": 1.1996684329650286e-05, "loss": 0.1454, "step": 5674 }, { "epoch": 0.4530214736169873, "grad_norm": 0.30378843082606805, "learning_rate": 1.1994150864840718e-05, "loss": 0.1443, "step": 5675 }, { "epoch": 0.4531013011894308, "grad_norm": 0.2902917355625766, "learning_rate": 1.199161726673006e-05, "loss": 0.1617, "step": 5676 }, { "epoch": 0.45318112876187433, "grad_norm": 0.28755652251326946, "learning_rate": 1.1989083535487667e-05, "loss": 0.1958, "step": 5677 }, { "epoch": 0.4532609563343179, "grad_norm": 0.2727004985307684, "learning_rate": 1.1986549671282911e-05, "loss": 0.1756, "step": 5678 }, { "epoch": 0.4533407839067614, "grad_norm": 0.282205211994029, "learning_rate": 1.1984015674285172e-05, "loss": 0.2147, "step": 5679 }, { "epoch": 0.4534206114792049, "grad_norm": 0.2803076946789826, "learning_rate": 1.1981481544663835e-05, "loss": 0.1938, "step": 5680 }, { "epoch": 0.45350043905164844, "grad_norm": 0.2674697735698609, "learning_rate": 1.1978947282588302e-05, "loss": 0.1451, "step": 5681 }, { "epoch": 0.45358026662409195, "grad_norm": 0.3101135517966362, "learning_rate": 1.197641288822797e-05, "loss": 0.1563, "step": 5682 }, { "epoch": 0.45366009419653547, "grad_norm": 0.26196432656449736, "learning_rate": 1.1973878361752263e-05, "loss": 0.159, "step": 5683 }, { "epoch": 0.453739921768979, "grad_norm": 0.3277652854273741, "learning_rate": 1.1971343703330595e-05, "loss": 0.158, "step": 5684 }, { "epoch": 0.45381974934142255, "grad_norm": 0.33363009687667344, "learning_rate": 1.1968808913132407e-05, "loss": 0.1854, "step": 5685 }, { "epoch": 0.45389957691386607, "grad_norm": 0.27927932472439676, "learning_rate": 1.1966273991327128e-05, "loss": 0.1832, "step": 5686 }, { "epoch": 0.4539794044863096, "grad_norm": 0.3442495369052781, "learning_rate": 1.196373893808422e-05, "loss": 0.1895, "step": 5687 }, { "epoch": 0.4540592320587531, "grad_norm": 0.29155148023832217, "learning_rate": 1.1961203753573129e-05, "loss": 0.1229, "step": 5688 }, { "epoch": 0.4541390596311966, "grad_norm": 0.32769456624212956, "learning_rate": 1.1958668437963332e-05, "loss": 0.1654, "step": 5689 }, { "epoch": 0.4542188872036401, "grad_norm": 0.30236626994973825, "learning_rate": 1.19561329914243e-05, "loss": 0.23, "step": 5690 }, { "epoch": 0.45429871477608363, "grad_norm": 0.29595220413015305, "learning_rate": 1.195359741412552e-05, "loss": 0.2281, "step": 5691 }, { "epoch": 0.4543785423485272, "grad_norm": 0.37561419729662554, "learning_rate": 1.1951061706236486e-05, "loss": 0.1966, "step": 5692 }, { "epoch": 0.4544583699209707, "grad_norm": 0.33209120678257326, "learning_rate": 1.1948525867926695e-05, "loss": 0.2153, "step": 5693 }, { "epoch": 0.45453819749341423, "grad_norm": 0.3179660897917326, "learning_rate": 1.1945989899365662e-05, "loss": 0.1288, "step": 5694 }, { "epoch": 0.45461802506585774, "grad_norm": 0.33753853085125857, "learning_rate": 1.1943453800722905e-05, "loss": 0.2114, "step": 5695 }, { "epoch": 0.45469785263830126, "grad_norm": 0.3548372037095227, "learning_rate": 1.1940917572167951e-05, "loss": 0.1583, "step": 5696 }, { "epoch": 0.4547776802107448, "grad_norm": 0.41588087724052514, "learning_rate": 1.1938381213870341e-05, "loss": 0.1558, "step": 5697 }, { "epoch": 0.4548575077831883, "grad_norm": 0.31943503779870286, "learning_rate": 1.1935844725999617e-05, "loss": 0.2149, "step": 5698 }, { "epoch": 0.45493733535563186, "grad_norm": 0.26751665629512306, "learning_rate": 1.1933308108725332e-05, "loss": 0.2183, "step": 5699 }, { "epoch": 0.45501716292807537, "grad_norm": 0.2882114182528824, "learning_rate": 1.1930771362217053e-05, "loss": 0.1771, "step": 5700 }, { "epoch": 0.4550969905005189, "grad_norm": 0.3346238703837364, "learning_rate": 1.192823448664435e-05, "loss": 0.1947, "step": 5701 }, { "epoch": 0.4551768180729624, "grad_norm": 0.47363231853374177, "learning_rate": 1.1925697482176799e-05, "loss": 0.179, "step": 5702 }, { "epoch": 0.4552566456454059, "grad_norm": 0.34431082542805524, "learning_rate": 1.1923160348983995e-05, "loss": 0.1762, "step": 5703 }, { "epoch": 0.4553364732178494, "grad_norm": 0.2542416791792434, "learning_rate": 1.1920623087235532e-05, "loss": 0.1622, "step": 5704 }, { "epoch": 0.455416300790293, "grad_norm": 0.29091534207649183, "learning_rate": 1.1918085697101017e-05, "loss": 0.1942, "step": 5705 }, { "epoch": 0.4554961283627365, "grad_norm": 0.3507154595521883, "learning_rate": 1.191554817875006e-05, "loss": 0.165, "step": 5706 }, { "epoch": 0.45557595593518, "grad_norm": 0.33397981543331995, "learning_rate": 1.1913010532352296e-05, "loss": 0.1805, "step": 5707 }, { "epoch": 0.45565578350762354, "grad_norm": 0.30495315824414415, "learning_rate": 1.1910472758077341e-05, "loss": 0.1751, "step": 5708 }, { "epoch": 0.45573561108006705, "grad_norm": 0.2950687604318254, "learning_rate": 1.1907934856094848e-05, "loss": 0.1604, "step": 5709 }, { "epoch": 0.45581543865251056, "grad_norm": 0.3354825903726733, "learning_rate": 1.1905396826574454e-05, "loss": 0.1949, "step": 5710 }, { "epoch": 0.4558952662249541, "grad_norm": 0.3234428474132661, "learning_rate": 1.1902858669685826e-05, "loss": 0.1991, "step": 5711 }, { "epoch": 0.45597509379739765, "grad_norm": 0.26718720422140607, "learning_rate": 1.1900320385598627e-05, "loss": 0.1943, "step": 5712 }, { "epoch": 0.45605492136984116, "grad_norm": 0.2677119682300074, "learning_rate": 1.1897781974482527e-05, "loss": 0.1987, "step": 5713 }, { "epoch": 0.4561347489422847, "grad_norm": 0.2661303852959154, "learning_rate": 1.189524343650722e-05, "loss": 0.173, "step": 5714 }, { "epoch": 0.4562145765147282, "grad_norm": 0.2780524103696262, "learning_rate": 1.1892704771842383e-05, "loss": 0.2442, "step": 5715 }, { "epoch": 0.4562944040871717, "grad_norm": 0.29434115100263175, "learning_rate": 1.1890165980657724e-05, "loss": 0.1956, "step": 5716 }, { "epoch": 0.4563742316596152, "grad_norm": 0.2740729872756789, "learning_rate": 1.1887627063122948e-05, "loss": 0.1806, "step": 5717 }, { "epoch": 0.45645405923205873, "grad_norm": 0.28208465815440875, "learning_rate": 1.1885088019407776e-05, "loss": 0.148, "step": 5718 }, { "epoch": 0.4565338868045023, "grad_norm": 0.35416154551503914, "learning_rate": 1.1882548849681927e-05, "loss": 0.1729, "step": 5719 }, { "epoch": 0.4566137143769458, "grad_norm": 0.3099340299666091, "learning_rate": 1.188000955411514e-05, "loss": 0.157, "step": 5720 }, { "epoch": 0.4566935419493893, "grad_norm": 0.32607410583411733, "learning_rate": 1.187747013287715e-05, "loss": 0.1591, "step": 5721 }, { "epoch": 0.45677336952183284, "grad_norm": 0.27416090039794166, "learning_rate": 1.1874930586137718e-05, "loss": 0.171, "step": 5722 }, { "epoch": 0.45685319709427635, "grad_norm": 0.30587598137872246, "learning_rate": 1.1872390914066594e-05, "loss": 0.199, "step": 5723 }, { "epoch": 0.45693302466671987, "grad_norm": 0.27918784332167623, "learning_rate": 1.1869851116833544e-05, "loss": 0.2062, "step": 5724 }, { "epoch": 0.4570128522391634, "grad_norm": 0.2986915923037614, "learning_rate": 1.1867311194608351e-05, "loss": 0.1441, "step": 5725 }, { "epoch": 0.45709267981160695, "grad_norm": 0.31797450725758086, "learning_rate": 1.1864771147560789e-05, "loss": 0.1925, "step": 5726 }, { "epoch": 0.45717250738405046, "grad_norm": 0.3279415245307291, "learning_rate": 1.186223097586066e-05, "loss": 0.1776, "step": 5727 }, { "epoch": 0.457252334956494, "grad_norm": 0.30348719238207683, "learning_rate": 1.1859690679677758e-05, "loss": 0.1686, "step": 5728 }, { "epoch": 0.4573321625289375, "grad_norm": 0.32259948155463025, "learning_rate": 1.1857150259181898e-05, "loss": 0.192, "step": 5729 }, { "epoch": 0.457411990101381, "grad_norm": 0.28906256570453215, "learning_rate": 1.1854609714542886e-05, "loss": 0.1599, "step": 5730 }, { "epoch": 0.4574918176738245, "grad_norm": 0.27951316839885404, "learning_rate": 1.185206904593056e-05, "loss": 0.1913, "step": 5731 }, { "epoch": 0.4575716452462681, "grad_norm": 0.2758673429280031, "learning_rate": 1.184952825351474e-05, "loss": 0.1879, "step": 5732 }, { "epoch": 0.4576514728187116, "grad_norm": 0.3238997058155254, "learning_rate": 1.1846987337465283e-05, "loss": 0.146, "step": 5733 }, { "epoch": 0.4577313003911551, "grad_norm": 0.3490059682502447, "learning_rate": 1.1844446297952029e-05, "loss": 0.1401, "step": 5734 }, { "epoch": 0.45781112796359863, "grad_norm": 0.2819916651255119, "learning_rate": 1.1841905135144838e-05, "loss": 0.1702, "step": 5735 }, { "epoch": 0.45789095553604214, "grad_norm": 0.27231205988014756, "learning_rate": 1.1839363849213579e-05, "loss": 0.1708, "step": 5736 }, { "epoch": 0.45797078310848566, "grad_norm": 0.328120922637008, "learning_rate": 1.1836822440328126e-05, "loss": 0.1799, "step": 5737 }, { "epoch": 0.45805061068092917, "grad_norm": 0.2703929350935211, "learning_rate": 1.1834280908658365e-05, "loss": 0.1806, "step": 5738 }, { "epoch": 0.45813043825337274, "grad_norm": 0.34772321901708825, "learning_rate": 1.183173925437418e-05, "loss": 0.2013, "step": 5739 }, { "epoch": 0.45821026582581625, "grad_norm": 0.2860525925300341, "learning_rate": 1.1829197477645476e-05, "loss": 0.1876, "step": 5740 }, { "epoch": 0.45829009339825977, "grad_norm": 0.34530099945799114, "learning_rate": 1.1826655578642157e-05, "loss": 0.1848, "step": 5741 }, { "epoch": 0.4583699209707033, "grad_norm": 0.338105369780413, "learning_rate": 1.1824113557534145e-05, "loss": 0.1859, "step": 5742 }, { "epoch": 0.4584497485431468, "grad_norm": 0.33633704976672485, "learning_rate": 1.1821571414491357e-05, "loss": 0.1485, "step": 5743 }, { "epoch": 0.4585295761155903, "grad_norm": 0.29274920673533084, "learning_rate": 1.1819029149683735e-05, "loss": 0.1793, "step": 5744 }, { "epoch": 0.4586094036880338, "grad_norm": 0.3325513764655201, "learning_rate": 1.1816486763281207e-05, "loss": 0.1752, "step": 5745 }, { "epoch": 0.4586892312604774, "grad_norm": 0.2538073791024761, "learning_rate": 1.1813944255453728e-05, "loss": 0.2064, "step": 5746 }, { "epoch": 0.4587690588329209, "grad_norm": 0.2647234738563114, "learning_rate": 1.1811401626371259e-05, "loss": 0.1621, "step": 5747 }, { "epoch": 0.4588488864053644, "grad_norm": 0.321756075816328, "learning_rate": 1.1808858876203756e-05, "loss": 0.17, "step": 5748 }, { "epoch": 0.45892871397780793, "grad_norm": 0.284821860560354, "learning_rate": 1.1806316005121196e-05, "loss": 0.2482, "step": 5749 }, { "epoch": 0.45900854155025145, "grad_norm": 0.2850182086767001, "learning_rate": 1.1803773013293559e-05, "loss": 0.1822, "step": 5750 }, { "epoch": 0.45908836912269496, "grad_norm": 0.2922481525049528, "learning_rate": 1.1801229900890833e-05, "loss": 0.2077, "step": 5751 }, { "epoch": 0.4591681966951385, "grad_norm": 0.40088993839170645, "learning_rate": 1.179868666808302e-05, "loss": 0.2383, "step": 5752 }, { "epoch": 0.45924802426758204, "grad_norm": 0.2900693678354874, "learning_rate": 1.179614331504012e-05, "loss": 0.1745, "step": 5753 }, { "epoch": 0.45932785184002556, "grad_norm": 0.25112313946174813, "learning_rate": 1.1793599841932146e-05, "loss": 0.206, "step": 5754 }, { "epoch": 0.45940767941246907, "grad_norm": 0.29648563566411795, "learning_rate": 1.1791056248929123e-05, "loss": 0.1897, "step": 5755 }, { "epoch": 0.4594875069849126, "grad_norm": 0.27749877520902727, "learning_rate": 1.1788512536201075e-05, "loss": 0.1599, "step": 5756 }, { "epoch": 0.4595673345573561, "grad_norm": 0.3936868867796463, "learning_rate": 1.1785968703918045e-05, "loss": 0.2108, "step": 5757 }, { "epoch": 0.4596471621297996, "grad_norm": 0.34414313235635274, "learning_rate": 1.178342475225007e-05, "loss": 0.1444, "step": 5758 }, { "epoch": 0.4597269897022431, "grad_norm": 0.26376784719840896, "learning_rate": 1.1780880681367212e-05, "loss": 0.193, "step": 5759 }, { "epoch": 0.4598068172746867, "grad_norm": 0.3236975248464826, "learning_rate": 1.1778336491439531e-05, "loss": 0.2204, "step": 5760 }, { "epoch": 0.4598866448471302, "grad_norm": 0.29954349153494947, "learning_rate": 1.1775792182637086e-05, "loss": 0.1763, "step": 5761 }, { "epoch": 0.4599664724195737, "grad_norm": 0.26417019336755265, "learning_rate": 1.1773247755129967e-05, "loss": 0.1783, "step": 5762 }, { "epoch": 0.46004629999201724, "grad_norm": 0.2635345994995306, "learning_rate": 1.1770703209088247e-05, "loss": 0.1808, "step": 5763 }, { "epoch": 0.46012612756446075, "grad_norm": 0.28049478915611537, "learning_rate": 1.1768158544682032e-05, "loss": 0.159, "step": 5764 }, { "epoch": 0.46020595513690427, "grad_norm": 0.2482351376727927, "learning_rate": 1.176561376208141e-05, "loss": 0.1283, "step": 5765 }, { "epoch": 0.46028578270934783, "grad_norm": 0.254022949546793, "learning_rate": 1.1763068861456499e-05, "loss": 0.1661, "step": 5766 }, { "epoch": 0.46036561028179135, "grad_norm": 0.3084767846022737, "learning_rate": 1.1760523842977411e-05, "loss": 0.1161, "step": 5767 }, { "epoch": 0.46044543785423486, "grad_norm": 0.3436421659941617, "learning_rate": 1.1757978706814268e-05, "loss": 0.2485, "step": 5768 }, { "epoch": 0.4605252654266784, "grad_norm": 0.2876068683669589, "learning_rate": 1.175543345313721e-05, "loss": 0.1783, "step": 5769 }, { "epoch": 0.4606050929991219, "grad_norm": 0.31103558588967384, "learning_rate": 1.1752888082116369e-05, "loss": 0.168, "step": 5770 }, { "epoch": 0.4606849205715654, "grad_norm": 0.3276386393260851, "learning_rate": 1.17503425939219e-05, "loss": 0.1967, "step": 5771 }, { "epoch": 0.4607647481440089, "grad_norm": 0.3260412525152908, "learning_rate": 1.1747796988723952e-05, "loss": 0.1517, "step": 5772 }, { "epoch": 0.4608445757164525, "grad_norm": 0.3260458453449303, "learning_rate": 1.1745251266692694e-05, "loss": 0.1846, "step": 5773 }, { "epoch": 0.460924403288896, "grad_norm": 0.29776834880771824, "learning_rate": 1.1742705427998298e-05, "loss": 0.1926, "step": 5774 }, { "epoch": 0.4610042308613395, "grad_norm": 0.2928746007119268, "learning_rate": 1.174015947281094e-05, "loss": 0.1431, "step": 5775 }, { "epoch": 0.46108405843378303, "grad_norm": 0.28265577474984194, "learning_rate": 1.1737613401300803e-05, "loss": 0.1332, "step": 5776 }, { "epoch": 0.46116388600622654, "grad_norm": 0.29975456014392027, "learning_rate": 1.1735067213638091e-05, "loss": 0.1933, "step": 5777 }, { "epoch": 0.46124371357867006, "grad_norm": 0.25395660910814705, "learning_rate": 1.1732520909992999e-05, "loss": 0.157, "step": 5778 }, { "epoch": 0.46132354115111357, "grad_norm": 0.34317281770876734, "learning_rate": 1.1729974490535743e-05, "loss": 0.1806, "step": 5779 }, { "epoch": 0.46140336872355714, "grad_norm": 0.30419703753912114, "learning_rate": 1.1727427955436537e-05, "loss": 0.2454, "step": 5780 }, { "epoch": 0.46148319629600065, "grad_norm": 0.2975432363354451, "learning_rate": 1.172488130486561e-05, "loss": 0.196, "step": 5781 }, { "epoch": 0.46156302386844417, "grad_norm": 0.33171821152442754, "learning_rate": 1.1722334538993195e-05, "loss": 0.1565, "step": 5782 }, { "epoch": 0.4616428514408877, "grad_norm": 0.3380862880915995, "learning_rate": 1.1719787657989527e-05, "loss": 0.1596, "step": 5783 }, { "epoch": 0.4617226790133312, "grad_norm": 0.2979639671690904, "learning_rate": 1.1717240662024864e-05, "loss": 0.1484, "step": 5784 }, { "epoch": 0.4618025065857747, "grad_norm": 0.30402579314761163, "learning_rate": 1.1714693551269456e-05, "loss": 0.1774, "step": 5785 }, { "epoch": 0.4618823341582182, "grad_norm": 0.29754287205124996, "learning_rate": 1.1712146325893573e-05, "loss": 0.1349, "step": 5786 }, { "epoch": 0.4619621617306618, "grad_norm": 0.2963256399076933, "learning_rate": 1.1709598986067481e-05, "loss": 0.1495, "step": 5787 }, { "epoch": 0.4620419893031053, "grad_norm": 0.3104414585873915, "learning_rate": 1.1707051531961464e-05, "loss": 0.1929, "step": 5788 }, { "epoch": 0.4621218168755488, "grad_norm": 0.33447560760886796, "learning_rate": 1.170450396374581e-05, "loss": 0.1482, "step": 5789 }, { "epoch": 0.46220164444799233, "grad_norm": 0.3143649208215378, "learning_rate": 1.1701956281590806e-05, "loss": 0.2012, "step": 5790 }, { "epoch": 0.46228147202043585, "grad_norm": 0.2501308003911753, "learning_rate": 1.1699408485666763e-05, "loss": 0.1573, "step": 5791 }, { "epoch": 0.46236129959287936, "grad_norm": 0.2689575058469877, "learning_rate": 1.1696860576143986e-05, "loss": 0.1318, "step": 5792 }, { "epoch": 0.46244112716532293, "grad_norm": 0.30035302682344456, "learning_rate": 1.16943125531928e-05, "loss": 0.2077, "step": 5793 }, { "epoch": 0.46252095473776644, "grad_norm": 0.2706212213970817, "learning_rate": 1.169176441698352e-05, "loss": 0.2, "step": 5794 }, { "epoch": 0.46260078231020996, "grad_norm": 0.2928128377735869, "learning_rate": 1.1689216167686486e-05, "loss": 0.2003, "step": 5795 }, { "epoch": 0.46268060988265347, "grad_norm": 0.2666474547602143, "learning_rate": 1.1686667805472035e-05, "loss": 0.189, "step": 5796 }, { "epoch": 0.462760437455097, "grad_norm": 0.3022954914876019, "learning_rate": 1.1684119330510517e-05, "loss": 0.1885, "step": 5797 }, { "epoch": 0.4628402650275405, "grad_norm": 0.3087530062000063, "learning_rate": 1.1681570742972283e-05, "loss": 0.1637, "step": 5798 }, { "epoch": 0.462920092599984, "grad_norm": 0.32053671621132784, "learning_rate": 1.1679022043027706e-05, "loss": 0.2236, "step": 5799 }, { "epoch": 0.4629999201724276, "grad_norm": 0.2878758218744173, "learning_rate": 1.1676473230847143e-05, "loss": 0.1662, "step": 5800 }, { "epoch": 0.4630797477448711, "grad_norm": 0.33416565885525695, "learning_rate": 1.1673924306600983e-05, "loss": 0.1824, "step": 5801 }, { "epoch": 0.4631595753173146, "grad_norm": 0.3290703658820812, "learning_rate": 1.1671375270459607e-05, "loss": 0.1733, "step": 5802 }, { "epoch": 0.4632394028897581, "grad_norm": 0.29395817813838654, "learning_rate": 1.1668826122593408e-05, "loss": 0.1414, "step": 5803 }, { "epoch": 0.46331923046220164, "grad_norm": 0.3025076693305289, "learning_rate": 1.1666276863172789e-05, "loss": 0.1751, "step": 5804 }, { "epoch": 0.46339905803464515, "grad_norm": 0.35267399269544936, "learning_rate": 1.1663727492368154e-05, "loss": 0.1713, "step": 5805 }, { "epoch": 0.46347888560708866, "grad_norm": 0.3007887104773692, "learning_rate": 1.166117801034992e-05, "loss": 0.1546, "step": 5806 }, { "epoch": 0.46355871317953223, "grad_norm": 0.2969633749838083, "learning_rate": 1.1658628417288508e-05, "loss": 0.1605, "step": 5807 }, { "epoch": 0.46363854075197575, "grad_norm": 0.29749715723022613, "learning_rate": 1.1656078713354355e-05, "loss": 0.1976, "step": 5808 }, { "epoch": 0.46371836832441926, "grad_norm": 0.3002509299967162, "learning_rate": 1.1653528898717889e-05, "loss": 0.1729, "step": 5809 }, { "epoch": 0.4637981958968628, "grad_norm": 0.3312246175252338, "learning_rate": 1.1650978973549565e-05, "loss": 0.1891, "step": 5810 }, { "epoch": 0.4638780234693063, "grad_norm": 0.25358833513869045, "learning_rate": 1.1648428938019827e-05, "loss": 0.1943, "step": 5811 }, { "epoch": 0.4639578510417498, "grad_norm": 0.26842458448491424, "learning_rate": 1.164587879229914e-05, "loss": 0.1852, "step": 5812 }, { "epoch": 0.4640376786141933, "grad_norm": 0.2856513175771553, "learning_rate": 1.1643328536557968e-05, "loss": 0.1691, "step": 5813 }, { "epoch": 0.4641175061866369, "grad_norm": 0.3022092485916768, "learning_rate": 1.1640778170966782e-05, "loss": 0.1768, "step": 5814 }, { "epoch": 0.4641973337590804, "grad_norm": 0.33218796393315364, "learning_rate": 1.1638227695696076e-05, "loss": 0.1659, "step": 5815 }, { "epoch": 0.4642771613315239, "grad_norm": 0.32360737589400573, "learning_rate": 1.1635677110916327e-05, "loss": 0.2069, "step": 5816 }, { "epoch": 0.4643569889039674, "grad_norm": 0.2952107082524862, "learning_rate": 1.1633126416798037e-05, "loss": 0.1741, "step": 5817 }, { "epoch": 0.46443681647641094, "grad_norm": 0.29833438710044974, "learning_rate": 1.163057561351171e-05, "loss": 0.1355, "step": 5818 }, { "epoch": 0.46451664404885445, "grad_norm": 0.30504378911710417, "learning_rate": 1.1628024701227859e-05, "loss": 0.2077, "step": 5819 }, { "epoch": 0.464596471621298, "grad_norm": 0.29924926273287483, "learning_rate": 1.1625473680116994e-05, "loss": 0.1771, "step": 5820 }, { "epoch": 0.46467629919374154, "grad_norm": 0.41483391002598796, "learning_rate": 1.1622922550349651e-05, "loss": 0.1506, "step": 5821 }, { "epoch": 0.46475612676618505, "grad_norm": 0.30088735800063654, "learning_rate": 1.1620371312096354e-05, "loss": 0.1522, "step": 5822 }, { "epoch": 0.46483595433862857, "grad_norm": 0.2699369521356308, "learning_rate": 1.161781996552765e-05, "loss": 0.189, "step": 5823 }, { "epoch": 0.4649157819110721, "grad_norm": 0.27935030998922655, "learning_rate": 1.1615268510814082e-05, "loss": 0.1694, "step": 5824 }, { "epoch": 0.4649956094835156, "grad_norm": 0.3027522139081655, "learning_rate": 1.1612716948126206e-05, "loss": 0.1916, "step": 5825 }, { "epoch": 0.4650754370559591, "grad_norm": 0.28292196880626874, "learning_rate": 1.1610165277634586e-05, "loss": 0.1911, "step": 5826 }, { "epoch": 0.4651552646284027, "grad_norm": 0.3562480804759518, "learning_rate": 1.1607613499509789e-05, "loss": 0.2211, "step": 5827 }, { "epoch": 0.4652350922008462, "grad_norm": 0.38211196164394695, "learning_rate": 1.160506161392239e-05, "loss": 0.1678, "step": 5828 }, { "epoch": 0.4653149197732897, "grad_norm": 0.31628332259129494, "learning_rate": 1.1602509621042974e-05, "loss": 0.1324, "step": 5829 }, { "epoch": 0.4653947473457332, "grad_norm": 0.2521143276839069, "learning_rate": 1.1599957521042136e-05, "loss": 0.2166, "step": 5830 }, { "epoch": 0.46547457491817673, "grad_norm": 0.3341946189849986, "learning_rate": 1.1597405314090465e-05, "loss": 0.1635, "step": 5831 }, { "epoch": 0.46555440249062024, "grad_norm": 0.3891451221245055, "learning_rate": 1.159485300035857e-05, "loss": 0.1945, "step": 5832 }, { "epoch": 0.46563423006306376, "grad_norm": 0.3010922894818922, "learning_rate": 1.1592300580017063e-05, "loss": 0.193, "step": 5833 }, { "epoch": 0.46571405763550733, "grad_norm": 0.28880466890324596, "learning_rate": 1.158974805323657e-05, "loss": 0.1696, "step": 5834 }, { "epoch": 0.46579388520795084, "grad_norm": 0.3055879886492989, "learning_rate": 1.1587195420187705e-05, "loss": 0.1428, "step": 5835 }, { "epoch": 0.46587371278039436, "grad_norm": 0.28902407290988136, "learning_rate": 1.1584642681041108e-05, "loss": 0.183, "step": 5836 }, { "epoch": 0.46595354035283787, "grad_norm": 0.336894079324021, "learning_rate": 1.1582089835967421e-05, "loss": 0.1516, "step": 5837 }, { "epoch": 0.4660333679252814, "grad_norm": 0.3003699621335657, "learning_rate": 1.1579536885137285e-05, "loss": 0.1662, "step": 5838 }, { "epoch": 0.4661131954977249, "grad_norm": 0.2946468328674599, "learning_rate": 1.1576983828721365e-05, "loss": 0.177, "step": 5839 }, { "epoch": 0.4661930230701684, "grad_norm": 0.3456302534307606, "learning_rate": 1.1574430666890313e-05, "loss": 0.1508, "step": 5840 }, { "epoch": 0.466272850642612, "grad_norm": 0.31628039380252293, "learning_rate": 1.1571877399814807e-05, "loss": 0.1519, "step": 5841 }, { "epoch": 0.4663526782150555, "grad_norm": 0.34074263268614924, "learning_rate": 1.1569324027665512e-05, "loss": 0.1853, "step": 5842 }, { "epoch": 0.466432505787499, "grad_norm": 0.2853178684614664, "learning_rate": 1.1566770550613117e-05, "loss": 0.1906, "step": 5843 }, { "epoch": 0.4665123333599425, "grad_norm": 0.3505995502196553, "learning_rate": 1.156421696882831e-05, "loss": 0.1872, "step": 5844 }, { "epoch": 0.46659216093238604, "grad_norm": 0.305272225415463, "learning_rate": 1.1561663282481792e-05, "loss": 0.1864, "step": 5845 }, { "epoch": 0.46667198850482955, "grad_norm": 0.3060883359341392, "learning_rate": 1.155910949174426e-05, "loss": 0.1539, "step": 5846 }, { "epoch": 0.46675181607727306, "grad_norm": 0.2819183531810241, "learning_rate": 1.1556555596786432e-05, "loss": 0.1347, "step": 5847 }, { "epoch": 0.46683164364971663, "grad_norm": 0.27673481483020984, "learning_rate": 1.155400159777902e-05, "loss": 0.2032, "step": 5848 }, { "epoch": 0.46691147122216015, "grad_norm": 0.33575078476255765, "learning_rate": 1.1551447494892751e-05, "loss": 0.1764, "step": 5849 }, { "epoch": 0.46699129879460366, "grad_norm": 0.3075560354612669, "learning_rate": 1.154889328829836e-05, "loss": 0.2008, "step": 5850 }, { "epoch": 0.4670711263670472, "grad_norm": 0.31186406445590675, "learning_rate": 1.1546338978166579e-05, "loss": 0.188, "step": 5851 }, { "epoch": 0.4671509539394907, "grad_norm": 0.31316784426135347, "learning_rate": 1.1543784564668159e-05, "loss": 0.1805, "step": 5852 }, { "epoch": 0.4672307815119342, "grad_norm": 0.2804051286152254, "learning_rate": 1.1541230047973846e-05, "loss": 0.2139, "step": 5853 }, { "epoch": 0.46731060908437777, "grad_norm": 0.28276616024462264, "learning_rate": 1.1538675428254408e-05, "loss": 0.155, "step": 5854 }, { "epoch": 0.4673904366568213, "grad_norm": 0.33406348187844265, "learning_rate": 1.1536120705680605e-05, "loss": 0.1476, "step": 5855 }, { "epoch": 0.4674702642292648, "grad_norm": 0.4859299970269641, "learning_rate": 1.1533565880423211e-05, "loss": 0.1578, "step": 5856 }, { "epoch": 0.4675500918017083, "grad_norm": 0.2921284809951522, "learning_rate": 1.153101095265301e-05, "loss": 0.1672, "step": 5857 }, { "epoch": 0.4676299193741518, "grad_norm": 0.3146489100594675, "learning_rate": 1.1528455922540785e-05, "loss": 0.153, "step": 5858 }, { "epoch": 0.46770974694659534, "grad_norm": 0.32727850903233824, "learning_rate": 1.152590079025733e-05, "loss": 0.1702, "step": 5859 }, { "epoch": 0.46778957451903885, "grad_norm": 0.3173261035876984, "learning_rate": 1.1523345555973441e-05, "loss": 0.1734, "step": 5860 }, { "epoch": 0.4678694020914824, "grad_norm": 0.3864873791332544, "learning_rate": 1.1520790219859937e-05, "loss": 0.1603, "step": 5861 }, { "epoch": 0.46794922966392594, "grad_norm": 0.3216481342197749, "learning_rate": 1.151823478208762e-05, "loss": 0.1315, "step": 5862 }, { "epoch": 0.46802905723636945, "grad_norm": 0.27898120761031153, "learning_rate": 1.1515679242827322e-05, "loss": 0.191, "step": 5863 }, { "epoch": 0.46810888480881296, "grad_norm": 0.30307403073202194, "learning_rate": 1.1513123602249861e-05, "loss": 0.2569, "step": 5864 }, { "epoch": 0.4681887123812565, "grad_norm": 0.3112637331209269, "learning_rate": 1.1510567860526076e-05, "loss": 0.2121, "step": 5865 }, { "epoch": 0.4682685399537, "grad_norm": 0.3164386302312524, "learning_rate": 1.1508012017826806e-05, "loss": 0.164, "step": 5866 }, { "epoch": 0.4683483675261435, "grad_norm": 0.3692190148295372, "learning_rate": 1.1505456074322902e-05, "loss": 0.1824, "step": 5867 }, { "epoch": 0.4684281950985871, "grad_norm": 0.3270696361953216, "learning_rate": 1.1502900030185216e-05, "loss": 0.1612, "step": 5868 }, { "epoch": 0.4685080226710306, "grad_norm": 0.25770552441078576, "learning_rate": 1.150034388558461e-05, "loss": 0.1837, "step": 5869 }, { "epoch": 0.4685878502434741, "grad_norm": 0.3433035636235256, "learning_rate": 1.1497787640691955e-05, "loss": 0.127, "step": 5870 }, { "epoch": 0.4686676778159176, "grad_norm": 0.3902803501695699, "learning_rate": 1.1495231295678122e-05, "loss": 0.1813, "step": 5871 }, { "epoch": 0.46874750538836113, "grad_norm": 0.3833665905207288, "learning_rate": 1.1492674850713996e-05, "loss": 0.1634, "step": 5872 }, { "epoch": 0.46882733296080464, "grad_norm": 0.26615405863494856, "learning_rate": 1.1490118305970462e-05, "loss": 0.1723, "step": 5873 }, { "epoch": 0.46890716053324816, "grad_norm": 0.283313686789244, "learning_rate": 1.1487561661618416e-05, "loss": 0.1748, "step": 5874 }, { "epoch": 0.4689869881056917, "grad_norm": 0.2709085219687263, "learning_rate": 1.148500491782876e-05, "loss": 0.191, "step": 5875 }, { "epoch": 0.46906681567813524, "grad_norm": 0.34109977077205467, "learning_rate": 1.1482448074772403e-05, "loss": 0.1427, "step": 5876 }, { "epoch": 0.46914664325057875, "grad_norm": 0.29253201869673695, "learning_rate": 1.1479891132620256e-05, "loss": 0.1584, "step": 5877 }, { "epoch": 0.46922647082302227, "grad_norm": 0.2311414329668339, "learning_rate": 1.1477334091543249e-05, "loss": 0.1933, "step": 5878 }, { "epoch": 0.4693062983954658, "grad_norm": 0.32753222858872, "learning_rate": 1.14747769517123e-05, "loss": 0.217, "step": 5879 }, { "epoch": 0.4693861259679093, "grad_norm": 0.29136356624514453, "learning_rate": 1.1472219713298348e-05, "loss": 0.1877, "step": 5880 }, { "epoch": 0.46946595354035286, "grad_norm": 0.32127721879324483, "learning_rate": 1.1469662376472334e-05, "loss": 0.1633, "step": 5881 }, { "epoch": 0.4695457811127964, "grad_norm": 0.31631381323271646, "learning_rate": 1.1467104941405208e-05, "loss": 0.2036, "step": 5882 }, { "epoch": 0.4696256086852399, "grad_norm": 0.3179484464761021, "learning_rate": 1.1464547408267923e-05, "loss": 0.2321, "step": 5883 }, { "epoch": 0.4697054362576834, "grad_norm": 0.27942698014471146, "learning_rate": 1.1461989777231437e-05, "loss": 0.1892, "step": 5884 }, { "epoch": 0.4697852638301269, "grad_norm": 0.2951162099722957, "learning_rate": 1.145943204846672e-05, "loss": 0.1835, "step": 5885 }, { "epoch": 0.46986509140257043, "grad_norm": 0.27877720884892127, "learning_rate": 1.1456874222144752e-05, "loss": 0.1842, "step": 5886 }, { "epoch": 0.46994491897501395, "grad_norm": 0.3166979019327324, "learning_rate": 1.1454316298436503e-05, "loss": 0.1644, "step": 5887 }, { "epoch": 0.4700247465474575, "grad_norm": 0.30986108391709927, "learning_rate": 1.1451758277512965e-05, "loss": 0.1768, "step": 5888 }, { "epoch": 0.47010457411990103, "grad_norm": 0.37300526958891445, "learning_rate": 1.1449200159545132e-05, "loss": 0.1865, "step": 5889 }, { "epoch": 0.47018440169234454, "grad_norm": 0.30711803346739874, "learning_rate": 1.1446641944704002e-05, "loss": 0.1658, "step": 5890 }, { "epoch": 0.47026422926478806, "grad_norm": 0.31627477373962426, "learning_rate": 1.1444083633160585e-05, "loss": 0.1641, "step": 5891 }, { "epoch": 0.47034405683723157, "grad_norm": 0.28637941603865164, "learning_rate": 1.1441525225085889e-05, "loss": 0.1718, "step": 5892 }, { "epoch": 0.4704238844096751, "grad_norm": 0.2815861438311914, "learning_rate": 1.1438966720650941e-05, "loss": 0.1998, "step": 5893 }, { "epoch": 0.4705037119821186, "grad_norm": 0.28837476434954074, "learning_rate": 1.143640812002676e-05, "loss": 0.1352, "step": 5894 }, { "epoch": 0.47058353955456217, "grad_norm": 0.28520026787273844, "learning_rate": 1.143384942338438e-05, "loss": 0.1502, "step": 5895 }, { "epoch": 0.4706633671270057, "grad_norm": 0.3597191797883546, "learning_rate": 1.1431290630894843e-05, "loss": 0.1699, "step": 5896 }, { "epoch": 0.4707431946994492, "grad_norm": 0.2708083937916248, "learning_rate": 1.1428731742729186e-05, "loss": 0.1477, "step": 5897 }, { "epoch": 0.4708230222718927, "grad_norm": 0.28507203961446226, "learning_rate": 1.1426172759058471e-05, "loss": 0.1838, "step": 5898 }, { "epoch": 0.4709028498443362, "grad_norm": 0.3392533232528187, "learning_rate": 1.1423613680053748e-05, "loss": 0.1463, "step": 5899 }, { "epoch": 0.47098267741677974, "grad_norm": 0.2977134538713612, "learning_rate": 1.1421054505886088e-05, "loss": 0.1634, "step": 5900 }, { "epoch": 0.47106250498922325, "grad_norm": 0.30593925592025845, "learning_rate": 1.1418495236726556e-05, "loss": 0.1988, "step": 5901 }, { "epoch": 0.4711423325616668, "grad_norm": 0.28152093841949005, "learning_rate": 1.1415935872746233e-05, "loss": 0.1635, "step": 5902 }, { "epoch": 0.47122216013411033, "grad_norm": 0.2810673172829312, "learning_rate": 1.1413376414116198e-05, "loss": 0.1837, "step": 5903 }, { "epoch": 0.47130198770655385, "grad_norm": 0.3337012256086905, "learning_rate": 1.141081686100754e-05, "loss": 0.1479, "step": 5904 }, { "epoch": 0.47138181527899736, "grad_norm": 0.3012257987241976, "learning_rate": 1.1408257213591363e-05, "loss": 0.1429, "step": 5905 }, { "epoch": 0.4714616428514409, "grad_norm": 0.30429486804644174, "learning_rate": 1.1405697472038761e-05, "loss": 0.1704, "step": 5906 }, { "epoch": 0.4715414704238844, "grad_norm": 0.33480863026209107, "learning_rate": 1.1403137636520849e-05, "loss": 0.1901, "step": 5907 }, { "epoch": 0.4716212979963279, "grad_norm": 0.29548440149933186, "learning_rate": 1.140057770720874e-05, "loss": 0.2295, "step": 5908 }, { "epoch": 0.4717011255687715, "grad_norm": 0.2715556093179177, "learning_rate": 1.1398017684273552e-05, "loss": 0.1366, "step": 5909 }, { "epoch": 0.471780953141215, "grad_norm": 0.321219196413249, "learning_rate": 1.1395457567886413e-05, "loss": 0.1946, "step": 5910 }, { "epoch": 0.4718607807136585, "grad_norm": 0.26399088866092235, "learning_rate": 1.139289735821846e-05, "loss": 0.1783, "step": 5911 }, { "epoch": 0.471940608286102, "grad_norm": 0.3079225312315165, "learning_rate": 1.1390337055440828e-05, "loss": 0.1987, "step": 5912 }, { "epoch": 0.47202043585854553, "grad_norm": 0.3193001719438286, "learning_rate": 1.1387776659724668e-05, "loss": 0.1516, "step": 5913 }, { "epoch": 0.47210026343098904, "grad_norm": 0.2963690854337454, "learning_rate": 1.1385216171241128e-05, "loss": 0.1419, "step": 5914 }, { "epoch": 0.4721800910034326, "grad_norm": 0.3104320409847908, "learning_rate": 1.138265559016137e-05, "loss": 0.1711, "step": 5915 }, { "epoch": 0.4722599185758761, "grad_norm": 0.28748476922567595, "learning_rate": 1.1380094916656562e-05, "loss": 0.1791, "step": 5916 }, { "epoch": 0.47233974614831964, "grad_norm": 0.345922362843922, "learning_rate": 1.1377534150897865e-05, "loss": 0.1716, "step": 5917 }, { "epoch": 0.47241957372076315, "grad_norm": 0.3305225658928978, "learning_rate": 1.1374973293056464e-05, "loss": 0.1345, "step": 5918 }, { "epoch": 0.47249940129320667, "grad_norm": 0.26498277465576364, "learning_rate": 1.1372412343303539e-05, "loss": 0.1742, "step": 5919 }, { "epoch": 0.4725792288656502, "grad_norm": 0.3246719136094782, "learning_rate": 1.1369851301810279e-05, "loss": 0.1435, "step": 5920 }, { "epoch": 0.4726590564380937, "grad_norm": 0.3230970152769175, "learning_rate": 1.1367290168747882e-05, "loss": 0.1574, "step": 5921 }, { "epoch": 0.47273888401053726, "grad_norm": 0.3398003723986526, "learning_rate": 1.1364728944287549e-05, "loss": 0.1499, "step": 5922 }, { "epoch": 0.4728187115829808, "grad_norm": 0.30501784688058514, "learning_rate": 1.1362167628600486e-05, "loss": 0.1924, "step": 5923 }, { "epoch": 0.4728985391554243, "grad_norm": 0.26860949602795225, "learning_rate": 1.1359606221857909e-05, "loss": 0.195, "step": 5924 }, { "epoch": 0.4729783667278678, "grad_norm": 0.30781115900640976, "learning_rate": 1.1357044724231032e-05, "loss": 0.1748, "step": 5925 }, { "epoch": 0.4730581943003113, "grad_norm": 0.3144666036849111, "learning_rate": 1.1354483135891088e-05, "loss": 0.199, "step": 5926 }, { "epoch": 0.47313802187275483, "grad_norm": 0.2780942592867631, "learning_rate": 1.135192145700931e-05, "loss": 0.1518, "step": 5927 }, { "epoch": 0.47321784944519835, "grad_norm": 0.31047326794453195, "learning_rate": 1.1349359687756927e-05, "loss": 0.1705, "step": 5928 }, { "epoch": 0.4732976770176419, "grad_norm": 0.3144697982146468, "learning_rate": 1.1346797828305194e-05, "loss": 0.2515, "step": 5929 }, { "epoch": 0.47337750459008543, "grad_norm": 0.3352410346628283, "learning_rate": 1.1344235878825354e-05, "loss": 0.1772, "step": 5930 }, { "epoch": 0.47345733216252894, "grad_norm": 0.2924986276097499, "learning_rate": 1.1341673839488665e-05, "loss": 0.1553, "step": 5931 }, { "epoch": 0.47353715973497246, "grad_norm": 0.2978838663258132, "learning_rate": 1.1339111710466389e-05, "loss": 0.2039, "step": 5932 }, { "epoch": 0.47361698730741597, "grad_norm": 0.31268470234322954, "learning_rate": 1.1336549491929796e-05, "loss": 0.205, "step": 5933 }, { "epoch": 0.4736968148798595, "grad_norm": 0.2851941627130424, "learning_rate": 1.1333987184050157e-05, "loss": 0.1623, "step": 5934 }, { "epoch": 0.473776642452303, "grad_norm": 0.2627174770560712, "learning_rate": 1.1331424786998756e-05, "loss": 0.1708, "step": 5935 }, { "epoch": 0.47385647002474657, "grad_norm": 0.2742475730837184, "learning_rate": 1.1328862300946875e-05, "loss": 0.1917, "step": 5936 }, { "epoch": 0.4739362975971901, "grad_norm": 0.33794019441906126, "learning_rate": 1.1326299726065808e-05, "loss": 0.1566, "step": 5937 }, { "epoch": 0.4740161251696336, "grad_norm": 0.3129613603828414, "learning_rate": 1.1323737062526853e-05, "loss": 0.1595, "step": 5938 }, { "epoch": 0.4740959527420771, "grad_norm": 0.34868198567789693, "learning_rate": 1.1321174310501314e-05, "loss": 0.2164, "step": 5939 }, { "epoch": 0.4741757803145206, "grad_norm": 0.2978224114736684, "learning_rate": 1.1318611470160503e-05, "loss": 0.1643, "step": 5940 }, { "epoch": 0.47425560788696414, "grad_norm": 0.30120280750978196, "learning_rate": 1.1316048541675731e-05, "loss": 0.202, "step": 5941 }, { "epoch": 0.4743354354594077, "grad_norm": 0.3282460519112539, "learning_rate": 1.1313485525218324e-05, "loss": 0.1721, "step": 5942 }, { "epoch": 0.4744152630318512, "grad_norm": 0.2647791915430632, "learning_rate": 1.1310922420959607e-05, "loss": 0.122, "step": 5943 }, { "epoch": 0.47449509060429473, "grad_norm": 0.26253371553376437, "learning_rate": 1.1308359229070914e-05, "loss": 0.1858, "step": 5944 }, { "epoch": 0.47457491817673825, "grad_norm": 0.31911717394837485, "learning_rate": 1.1305795949723585e-05, "loss": 0.1588, "step": 5945 }, { "epoch": 0.47465474574918176, "grad_norm": 0.26345176572723306, "learning_rate": 1.1303232583088963e-05, "loss": 0.1821, "step": 5946 }, { "epoch": 0.4747345733216253, "grad_norm": 0.2846931825054471, "learning_rate": 1.13006691293384e-05, "loss": 0.1838, "step": 5947 }, { "epoch": 0.4748144008940688, "grad_norm": 0.2724759247275911, "learning_rate": 1.1298105588643256e-05, "loss": 0.1442, "step": 5948 }, { "epoch": 0.47489422846651236, "grad_norm": 0.32317475424170977, "learning_rate": 1.1295541961174887e-05, "loss": 0.1511, "step": 5949 }, { "epoch": 0.47497405603895587, "grad_norm": 0.3476838169001493, "learning_rate": 1.1292978247104664e-05, "loss": 0.1548, "step": 5950 }, { "epoch": 0.4750538836113994, "grad_norm": 0.2930430032048108, "learning_rate": 1.1290414446603965e-05, "loss": 0.1644, "step": 5951 }, { "epoch": 0.4751337111838429, "grad_norm": 0.3176756032007333, "learning_rate": 1.1287850559844164e-05, "loss": 0.2025, "step": 5952 }, { "epoch": 0.4752135387562864, "grad_norm": 0.31664373474303203, "learning_rate": 1.1285286586996658e-05, "loss": 0.1647, "step": 5953 }, { "epoch": 0.4752933663287299, "grad_norm": 0.3258976855950005, "learning_rate": 1.128272252823282e-05, "loss": 0.2065, "step": 5954 }, { "epoch": 0.47537319390117344, "grad_norm": 0.2839176592064438, "learning_rate": 1.1280158383724063e-05, "loss": 0.1562, "step": 5955 }, { "epoch": 0.475453021473617, "grad_norm": 0.25644516238107884, "learning_rate": 1.1277594153641782e-05, "loss": 0.1777, "step": 5956 }, { "epoch": 0.4755328490460605, "grad_norm": 0.3133400258390562, "learning_rate": 1.1275029838157388e-05, "loss": 0.1427, "step": 5957 }, { "epoch": 0.47561267661850404, "grad_norm": 0.32810534871543007, "learning_rate": 1.1272465437442295e-05, "loss": 0.1989, "step": 5958 }, { "epoch": 0.47569250419094755, "grad_norm": 0.2564905746949483, "learning_rate": 1.1269900951667925e-05, "loss": 0.1629, "step": 5959 }, { "epoch": 0.47577233176339107, "grad_norm": 0.27558959574976916, "learning_rate": 1.12673363810057e-05, "loss": 0.1496, "step": 5960 }, { "epoch": 0.4758521593358346, "grad_norm": 0.2688102445034336, "learning_rate": 1.1264771725627053e-05, "loss": 0.1857, "step": 5961 }, { "epoch": 0.4759319869082781, "grad_norm": 0.30453750589558126, "learning_rate": 1.1262206985703423e-05, "loss": 0.1481, "step": 5962 }, { "epoch": 0.47601181448072166, "grad_norm": 0.3352391259132039, "learning_rate": 1.1259642161406248e-05, "loss": 0.1723, "step": 5963 }, { "epoch": 0.4760916420531652, "grad_norm": 0.279696230104307, "learning_rate": 1.1257077252906982e-05, "loss": 0.1532, "step": 5964 }, { "epoch": 0.4761714696256087, "grad_norm": 0.2721805022137406, "learning_rate": 1.1254512260377077e-05, "loss": 0.1751, "step": 5965 }, { "epoch": 0.4762512971980522, "grad_norm": 0.2985333105431575, "learning_rate": 1.1251947183987992e-05, "loss": 0.1683, "step": 5966 }, { "epoch": 0.4763311247704957, "grad_norm": 0.32112935970463335, "learning_rate": 1.1249382023911188e-05, "loss": 0.1912, "step": 5967 }, { "epoch": 0.47641095234293923, "grad_norm": 0.27876089109740804, "learning_rate": 1.1246816780318148e-05, "loss": 0.1774, "step": 5968 }, { "epoch": 0.4764907799153828, "grad_norm": 0.2711406051444543, "learning_rate": 1.1244251453380336e-05, "loss": 0.1447, "step": 5969 }, { "epoch": 0.4765706074878263, "grad_norm": 0.3031839844601435, "learning_rate": 1.1241686043269238e-05, "loss": 0.2012, "step": 5970 }, { "epoch": 0.47665043506026983, "grad_norm": 0.27832046912018304, "learning_rate": 1.123912055015634e-05, "loss": 0.2314, "step": 5971 }, { "epoch": 0.47673026263271334, "grad_norm": 0.2696037175769014, "learning_rate": 1.123655497421314e-05, "loss": 0.1583, "step": 5972 }, { "epoch": 0.47681009020515686, "grad_norm": 0.2908979382729464, "learning_rate": 1.1233989315611132e-05, "loss": 0.2011, "step": 5973 }, { "epoch": 0.47688991777760037, "grad_norm": 0.2717153485974649, "learning_rate": 1.123142357452182e-05, "loss": 0.134, "step": 5974 }, { "epoch": 0.4769697453500439, "grad_norm": 0.35926691858650756, "learning_rate": 1.1228857751116721e-05, "loss": 0.1518, "step": 5975 }, { "epoch": 0.47704957292248745, "grad_norm": 0.2904865162504807, "learning_rate": 1.1226291845567338e-05, "loss": 0.2148, "step": 5976 }, { "epoch": 0.47712940049493097, "grad_norm": 0.30322564253021733, "learning_rate": 1.1223725858045205e-05, "loss": 0.1663, "step": 5977 }, { "epoch": 0.4772092280673745, "grad_norm": 0.26967146263563946, "learning_rate": 1.1221159788721833e-05, "loss": 0.1782, "step": 5978 }, { "epoch": 0.477289055639818, "grad_norm": 0.3261234223036553, "learning_rate": 1.1218593637768769e-05, "loss": 0.1697, "step": 5979 }, { "epoch": 0.4773688832122615, "grad_norm": 0.3597916670304269, "learning_rate": 1.1216027405357539e-05, "loss": 0.1598, "step": 5980 }, { "epoch": 0.477448710784705, "grad_norm": 0.34697942257160996, "learning_rate": 1.121346109165969e-05, "loss": 0.1945, "step": 5981 }, { "epoch": 0.47752853835714854, "grad_norm": 0.28400801122899383, "learning_rate": 1.1210894696846769e-05, "loss": 0.204, "step": 5982 }, { "epoch": 0.4776083659295921, "grad_norm": 0.2647445486112228, "learning_rate": 1.120832822109033e-05, "loss": 0.1944, "step": 5983 }, { "epoch": 0.4776881935020356, "grad_norm": 0.38294739519490345, "learning_rate": 1.1205761664561932e-05, "loss": 0.1974, "step": 5984 }, { "epoch": 0.47776802107447913, "grad_norm": 0.40929102966376135, "learning_rate": 1.1203195027433135e-05, "loss": 0.1833, "step": 5985 }, { "epoch": 0.47784784864692265, "grad_norm": 0.33501705259070824, "learning_rate": 1.1200628309875517e-05, "loss": 0.187, "step": 5986 }, { "epoch": 0.47792767621936616, "grad_norm": 0.33464665274876015, "learning_rate": 1.1198061512060643e-05, "loss": 0.1646, "step": 5987 }, { "epoch": 0.4780075037918097, "grad_norm": 0.2815726967672839, "learning_rate": 1.1195494634160102e-05, "loss": 0.1858, "step": 5988 }, { "epoch": 0.4780873313642532, "grad_norm": 0.3281755061019444, "learning_rate": 1.1192927676345473e-05, "loss": 0.2046, "step": 5989 }, { "epoch": 0.47816715893669676, "grad_norm": 0.3608182874950418, "learning_rate": 1.1190360638788355e-05, "loss": 0.1616, "step": 5990 }, { "epoch": 0.47824698650914027, "grad_norm": 0.32330996216463825, "learning_rate": 1.1187793521660332e-05, "loss": 0.1508, "step": 5991 }, { "epoch": 0.4783268140815838, "grad_norm": 0.3057728447103085, "learning_rate": 1.1185226325133018e-05, "loss": 0.1783, "step": 5992 }, { "epoch": 0.4784066416540273, "grad_norm": 0.26630801723062275, "learning_rate": 1.1182659049378011e-05, "loss": 0.1337, "step": 5993 }, { "epoch": 0.4784864692264708, "grad_norm": 0.2991556745711648, "learning_rate": 1.118009169456693e-05, "loss": 0.1623, "step": 5994 }, { "epoch": 0.4785662967989143, "grad_norm": 0.3225203918292723, "learning_rate": 1.1177524260871386e-05, "loss": 0.1419, "step": 5995 }, { "epoch": 0.47864612437135784, "grad_norm": 0.33904943951644834, "learning_rate": 1.1174956748463007e-05, "loss": 0.1702, "step": 5996 }, { "epoch": 0.4787259519438014, "grad_norm": 0.3279446820904886, "learning_rate": 1.1172389157513418e-05, "loss": 0.1981, "step": 5997 }, { "epoch": 0.4788057795162449, "grad_norm": 0.3433856217208625, "learning_rate": 1.1169821488194255e-05, "loss": 0.1882, "step": 5998 }, { "epoch": 0.47888560708868844, "grad_norm": 0.3543810134264005, "learning_rate": 1.1167253740677155e-05, "loss": 0.1465, "step": 5999 }, { "epoch": 0.47896543466113195, "grad_norm": 0.28350896253092916, "learning_rate": 1.116468591513376e-05, "loss": 0.181, "step": 6000 }, { "epoch": 0.47904526223357546, "grad_norm": 0.3675039408791513, "learning_rate": 1.1162118011735723e-05, "loss": 0.177, "step": 6001 }, { "epoch": 0.479125089806019, "grad_norm": 0.3721684266144047, "learning_rate": 1.1159550030654694e-05, "loss": 0.1742, "step": 6002 }, { "epoch": 0.47920491737846255, "grad_norm": 0.2993423130534559, "learning_rate": 1.1156981972062335e-05, "loss": 0.12, "step": 6003 }, { "epoch": 0.47928474495090606, "grad_norm": 0.27152628363586806, "learning_rate": 1.1154413836130308e-05, "loss": 0.1804, "step": 6004 }, { "epoch": 0.4793645725233496, "grad_norm": 0.3288289108788947, "learning_rate": 1.1151845623030289e-05, "loss": 0.1825, "step": 6005 }, { "epoch": 0.4794444000957931, "grad_norm": 0.3011939457861087, "learning_rate": 1.1149277332933946e-05, "loss": 0.1281, "step": 6006 }, { "epoch": 0.4795242276682366, "grad_norm": 0.34895182922973533, "learning_rate": 1.114670896601296e-05, "loss": 0.1997, "step": 6007 }, { "epoch": 0.4796040552406801, "grad_norm": 0.28397037841582096, "learning_rate": 1.114414052243902e-05, "loss": 0.1761, "step": 6008 }, { "epoch": 0.47968388281312363, "grad_norm": 0.33371994566221386, "learning_rate": 1.1141572002383812e-05, "loss": 0.151, "step": 6009 }, { "epoch": 0.4797637103855672, "grad_norm": 0.26130984249107203, "learning_rate": 1.1139003406019036e-05, "loss": 0.142, "step": 6010 }, { "epoch": 0.4798435379580107, "grad_norm": 0.2729611952071738, "learning_rate": 1.1136434733516385e-05, "loss": 0.1697, "step": 6011 }, { "epoch": 0.4799233655304542, "grad_norm": 0.2700943424246171, "learning_rate": 1.1133865985047575e-05, "loss": 0.1167, "step": 6012 }, { "epoch": 0.48000319310289774, "grad_norm": 0.2789996727052782, "learning_rate": 1.1131297160784309e-05, "loss": 0.1581, "step": 6013 }, { "epoch": 0.48008302067534125, "grad_norm": 0.2918988694801814, "learning_rate": 1.1128728260898305e-05, "loss": 0.1913, "step": 6014 }, { "epoch": 0.48016284824778477, "grad_norm": 0.2837944318902004, "learning_rate": 1.1126159285561283e-05, "loss": 0.2024, "step": 6015 }, { "epoch": 0.4802426758202283, "grad_norm": 0.40910566031327367, "learning_rate": 1.1123590234944968e-05, "loss": 0.1567, "step": 6016 }, { "epoch": 0.48032250339267185, "grad_norm": 0.31812398587861845, "learning_rate": 1.1121021109221094e-05, "loss": 0.1873, "step": 6017 }, { "epoch": 0.48040233096511537, "grad_norm": 0.3071881633674547, "learning_rate": 1.1118451908561391e-05, "loss": 0.2098, "step": 6018 }, { "epoch": 0.4804821585375589, "grad_norm": 0.28640176248099164, "learning_rate": 1.1115882633137611e-05, "loss": 0.1926, "step": 6019 }, { "epoch": 0.4805619861100024, "grad_norm": 0.2924156228354998, "learning_rate": 1.1113313283121489e-05, "loss": 0.1716, "step": 6020 }, { "epoch": 0.4806418136824459, "grad_norm": 0.2857232167974316, "learning_rate": 1.1110743858684783e-05, "loss": 0.1916, "step": 6021 }, { "epoch": 0.4807216412548894, "grad_norm": 0.3084088522911161, "learning_rate": 1.1108174359999242e-05, "loss": 0.1434, "step": 6022 }, { "epoch": 0.48080146882733293, "grad_norm": 0.31115637317816736, "learning_rate": 1.1105604787236634e-05, "loss": 0.1506, "step": 6023 }, { "epoch": 0.4808812963997765, "grad_norm": 0.2946663391068005, "learning_rate": 1.1103035140568718e-05, "loss": 0.1902, "step": 6024 }, { "epoch": 0.48096112397222, "grad_norm": 0.2934719869547374, "learning_rate": 1.110046542016727e-05, "loss": 0.1739, "step": 6025 }, { "epoch": 0.48104095154466353, "grad_norm": 0.32632764393287667, "learning_rate": 1.1097895626204062e-05, "loss": 0.1992, "step": 6026 }, { "epoch": 0.48112077911710704, "grad_norm": 0.2988518181719213, "learning_rate": 1.1095325758850878e-05, "loss": 0.1899, "step": 6027 }, { "epoch": 0.48120060668955056, "grad_norm": 0.27417905561632894, "learning_rate": 1.1092755818279501e-05, "loss": 0.1443, "step": 6028 }, { "epoch": 0.4812804342619941, "grad_norm": 0.298066319803716, "learning_rate": 1.1090185804661722e-05, "loss": 0.1593, "step": 6029 }, { "epoch": 0.48136026183443764, "grad_norm": 0.35389930151865884, "learning_rate": 1.1087615718169337e-05, "loss": 0.1762, "step": 6030 }, { "epoch": 0.48144008940688116, "grad_norm": 0.31814882214302553, "learning_rate": 1.1085045558974143e-05, "loss": 0.1874, "step": 6031 }, { "epoch": 0.48151991697932467, "grad_norm": 0.2914321372683505, "learning_rate": 1.108247532724795e-05, "loss": 0.1607, "step": 6032 }, { "epoch": 0.4815997445517682, "grad_norm": 0.34265207826374516, "learning_rate": 1.1079905023162566e-05, "loss": 0.1676, "step": 6033 }, { "epoch": 0.4816795721242117, "grad_norm": 0.3325574924426666, "learning_rate": 1.1077334646889803e-05, "loss": 0.1635, "step": 6034 }, { "epoch": 0.4817593996966552, "grad_norm": 0.27395668093179076, "learning_rate": 1.1074764198601482e-05, "loss": 0.1727, "step": 6035 }, { "epoch": 0.4818392272690987, "grad_norm": 0.30025701723972686, "learning_rate": 1.107219367846943e-05, "loss": 0.15, "step": 6036 }, { "epoch": 0.4819190548415423, "grad_norm": 0.3666848601773692, "learning_rate": 1.1069623086665471e-05, "loss": 0.2152, "step": 6037 }, { "epoch": 0.4819988824139858, "grad_norm": 0.30079599846923416, "learning_rate": 1.1067052423361444e-05, "loss": 0.1667, "step": 6038 }, { "epoch": 0.4820787099864293, "grad_norm": 0.30930162725335064, "learning_rate": 1.1064481688729181e-05, "loss": 0.1392, "step": 6039 }, { "epoch": 0.48215853755887284, "grad_norm": 0.30900872165856297, "learning_rate": 1.1061910882940535e-05, "loss": 0.1704, "step": 6040 }, { "epoch": 0.48223836513131635, "grad_norm": 0.29791531689040995, "learning_rate": 1.1059340006167342e-05, "loss": 0.1539, "step": 6041 }, { "epoch": 0.48231819270375986, "grad_norm": 0.28892887826520913, "learning_rate": 1.1056769058581469e-05, "loss": 0.1892, "step": 6042 }, { "epoch": 0.4823980202762034, "grad_norm": 0.3113037841632837, "learning_rate": 1.1054198040354765e-05, "loss": 0.1796, "step": 6043 }, { "epoch": 0.48247784784864695, "grad_norm": 0.32244825405295946, "learning_rate": 1.1051626951659091e-05, "loss": 0.1909, "step": 6044 }, { "epoch": 0.48255767542109046, "grad_norm": 0.2826827590437772, "learning_rate": 1.104905579266632e-05, "loss": 0.1724, "step": 6045 }, { "epoch": 0.482637502993534, "grad_norm": 0.29143139311673677, "learning_rate": 1.104648456354832e-05, "loss": 0.1461, "step": 6046 }, { "epoch": 0.4827173305659775, "grad_norm": 0.29791657986568854, "learning_rate": 1.1043913264476966e-05, "loss": 0.1593, "step": 6047 }, { "epoch": 0.482797158138421, "grad_norm": 0.28526388978449324, "learning_rate": 1.1041341895624142e-05, "loss": 0.1347, "step": 6048 }, { "epoch": 0.4828769857108645, "grad_norm": 0.2745281608252186, "learning_rate": 1.1038770457161737e-05, "loss": 0.1888, "step": 6049 }, { "epoch": 0.48295681328330803, "grad_norm": 0.291338317825532, "learning_rate": 1.1036198949261636e-05, "loss": 0.2179, "step": 6050 }, { "epoch": 0.4830366408557516, "grad_norm": 0.2905205941321316, "learning_rate": 1.1033627372095732e-05, "loss": 0.1791, "step": 6051 }, { "epoch": 0.4831164684281951, "grad_norm": 0.3000307324041343, "learning_rate": 1.1031055725835933e-05, "loss": 0.2198, "step": 6052 }, { "epoch": 0.4831962960006386, "grad_norm": 0.3091654590456262, "learning_rate": 1.1028484010654137e-05, "loss": 0.2156, "step": 6053 }, { "epoch": 0.48327612357308214, "grad_norm": 0.29472993904582523, "learning_rate": 1.1025912226722258e-05, "loss": 0.2002, "step": 6054 }, { "epoch": 0.48335595114552565, "grad_norm": 0.3413429630096129, "learning_rate": 1.1023340374212203e-05, "loss": 0.1589, "step": 6055 }, { "epoch": 0.48343577871796917, "grad_norm": 0.29844309304468897, "learning_rate": 1.1020768453295896e-05, "loss": 0.1934, "step": 6056 }, { "epoch": 0.48351560629041274, "grad_norm": 0.3026737718163717, "learning_rate": 1.1018196464145259e-05, "loss": 0.1407, "step": 6057 }, { "epoch": 0.48359543386285625, "grad_norm": 0.2832786362725117, "learning_rate": 1.1015624406932215e-05, "loss": 0.1398, "step": 6058 }, { "epoch": 0.48367526143529976, "grad_norm": 0.2910348379298857, "learning_rate": 1.10130522818287e-05, "loss": 0.1573, "step": 6059 }, { "epoch": 0.4837550890077433, "grad_norm": 0.3385023014049714, "learning_rate": 1.1010480089006648e-05, "loss": 0.2316, "step": 6060 }, { "epoch": 0.4838349165801868, "grad_norm": 0.33019199061937554, "learning_rate": 1.1007907828638e-05, "loss": 0.1293, "step": 6061 }, { "epoch": 0.4839147441526303, "grad_norm": 0.32145016217635664, "learning_rate": 1.1005335500894706e-05, "loss": 0.1523, "step": 6062 }, { "epoch": 0.4839945717250738, "grad_norm": 0.30400337563347946, "learning_rate": 1.1002763105948708e-05, "loss": 0.1463, "step": 6063 }, { "epoch": 0.4840743992975174, "grad_norm": 0.2811477674935335, "learning_rate": 1.1000190643971965e-05, "loss": 0.1341, "step": 6064 }, { "epoch": 0.4841542268699609, "grad_norm": 0.27889102324178905, "learning_rate": 1.0997618115136442e-05, "loss": 0.1422, "step": 6065 }, { "epoch": 0.4842340544424044, "grad_norm": 0.3148961402445088, "learning_rate": 1.099504551961409e-05, "loss": 0.1672, "step": 6066 }, { "epoch": 0.48431388201484793, "grad_norm": 0.32424299413637614, "learning_rate": 1.0992472857576883e-05, "loss": 0.2046, "step": 6067 }, { "epoch": 0.48439370958729144, "grad_norm": 0.3052605863683079, "learning_rate": 1.0989900129196793e-05, "loss": 0.1667, "step": 6068 }, { "epoch": 0.48447353715973496, "grad_norm": 0.3268689193298762, "learning_rate": 1.0987327334645802e-05, "loss": 0.1958, "step": 6069 }, { "epoch": 0.48455336473217847, "grad_norm": 0.31445761024377683, "learning_rate": 1.0984754474095878e-05, "loss": 0.1567, "step": 6070 }, { "epoch": 0.48463319230462204, "grad_norm": 0.4337857378339768, "learning_rate": 1.098218154771902e-05, "loss": 0.1547, "step": 6071 }, { "epoch": 0.48471301987706555, "grad_norm": 0.29171817954733675, "learning_rate": 1.0979608555687211e-05, "loss": 0.138, "step": 6072 }, { "epoch": 0.48479284744950907, "grad_norm": 0.286316790939187, "learning_rate": 1.0977035498172446e-05, "loss": 0.1629, "step": 6073 }, { "epoch": 0.4848726750219526, "grad_norm": 0.35252247084130234, "learning_rate": 1.0974462375346725e-05, "loss": 0.1797, "step": 6074 }, { "epoch": 0.4849525025943961, "grad_norm": 0.23984554703546865, "learning_rate": 1.0971889187382052e-05, "loss": 0.1766, "step": 6075 }, { "epoch": 0.4850323301668396, "grad_norm": 0.2907906247146029, "learning_rate": 1.0969315934450433e-05, "loss": 0.14, "step": 6076 }, { "epoch": 0.4851121577392831, "grad_norm": 0.3004625540023362, "learning_rate": 1.0966742616723875e-05, "loss": 0.1795, "step": 6077 }, { "epoch": 0.4851919853117267, "grad_norm": 0.3038705511476486, "learning_rate": 1.0964169234374404e-05, "loss": 0.1976, "step": 6078 }, { "epoch": 0.4852718128841702, "grad_norm": 0.29783625234055777, "learning_rate": 1.0961595787574035e-05, "loss": 0.1719, "step": 6079 }, { "epoch": 0.4853516404566137, "grad_norm": 0.29905801407288607, "learning_rate": 1.0959022276494793e-05, "loss": 0.1653, "step": 6080 }, { "epoch": 0.48543146802905723, "grad_norm": 0.30523928287048785, "learning_rate": 1.0956448701308706e-05, "loss": 0.217, "step": 6081 }, { "epoch": 0.48551129560150075, "grad_norm": 0.2504434331865499, "learning_rate": 1.0953875062187809e-05, "loss": 0.1601, "step": 6082 }, { "epoch": 0.48559112317394426, "grad_norm": 0.2709500378533387, "learning_rate": 1.0951301359304137e-05, "loss": 0.1737, "step": 6083 }, { "epoch": 0.4856709507463878, "grad_norm": 0.2709638431144356, "learning_rate": 1.0948727592829735e-05, "loss": 0.1976, "step": 6084 }, { "epoch": 0.48575077831883134, "grad_norm": 0.2642467931012883, "learning_rate": 1.0946153762936647e-05, "loss": 0.1782, "step": 6085 }, { "epoch": 0.48583060589127486, "grad_norm": 0.2839173518794333, "learning_rate": 1.0943579869796928e-05, "loss": 0.1509, "step": 6086 }, { "epoch": 0.48591043346371837, "grad_norm": 0.2958507698642312, "learning_rate": 1.0941005913582625e-05, "loss": 0.1378, "step": 6087 }, { "epoch": 0.4859902610361619, "grad_norm": 0.31016206180260314, "learning_rate": 1.0938431894465801e-05, "loss": 0.2134, "step": 6088 }, { "epoch": 0.4860700886086054, "grad_norm": 0.25263148501946386, "learning_rate": 1.093585781261852e-05, "loss": 0.1483, "step": 6089 }, { "epoch": 0.4861499161810489, "grad_norm": 0.29932911970699394, "learning_rate": 1.0933283668212846e-05, "loss": 0.1459, "step": 6090 }, { "epoch": 0.4862297437534925, "grad_norm": 0.30928012552726336, "learning_rate": 1.0930709461420854e-05, "loss": 0.1641, "step": 6091 }, { "epoch": 0.486309571325936, "grad_norm": 0.31263641329321296, "learning_rate": 1.0928135192414617e-05, "loss": 0.178, "step": 6092 }, { "epoch": 0.4863893988983795, "grad_norm": 0.2838040484371008, "learning_rate": 1.0925560861366215e-05, "loss": 0.1541, "step": 6093 }, { "epoch": 0.486469226470823, "grad_norm": 0.2864056518802234, "learning_rate": 1.0922986468447733e-05, "loss": 0.2121, "step": 6094 }, { "epoch": 0.48654905404326654, "grad_norm": 0.27272382211260254, "learning_rate": 1.092041201383126e-05, "loss": 0.1535, "step": 6095 }, { "epoch": 0.48662888161571005, "grad_norm": 0.3235674823745205, "learning_rate": 1.0917837497688884e-05, "loss": 0.1262, "step": 6096 }, { "epoch": 0.48670870918815357, "grad_norm": 0.3243401995503725, "learning_rate": 1.0915262920192702e-05, "loss": 0.1428, "step": 6097 }, { "epoch": 0.48678853676059713, "grad_norm": 0.266444421310414, "learning_rate": 1.091268828151482e-05, "loss": 0.1309, "step": 6098 }, { "epoch": 0.48686836433304065, "grad_norm": 0.2575419785385897, "learning_rate": 1.0910113581827334e-05, "loss": 0.1947, "step": 6099 }, { "epoch": 0.48694819190548416, "grad_norm": 0.2839019916049785, "learning_rate": 1.0907538821302359e-05, "loss": 0.2085, "step": 6100 }, { "epoch": 0.4870280194779277, "grad_norm": 0.3371336078350328, "learning_rate": 1.0904964000112004e-05, "loss": 0.1963, "step": 6101 }, { "epoch": 0.4871078470503712, "grad_norm": 0.24536840216743253, "learning_rate": 1.0902389118428392e-05, "loss": 0.1808, "step": 6102 }, { "epoch": 0.4871876746228147, "grad_norm": 0.30828442493556407, "learning_rate": 1.0899814176423633e-05, "loss": 0.1463, "step": 6103 }, { "epoch": 0.4872675021952582, "grad_norm": 0.2815802897790047, "learning_rate": 1.0897239174269861e-05, "loss": 0.2091, "step": 6104 }, { "epoch": 0.4873473297677018, "grad_norm": 0.37410937535261873, "learning_rate": 1.0894664112139197e-05, "loss": 0.1528, "step": 6105 }, { "epoch": 0.4874271573401453, "grad_norm": 0.32049665852316406, "learning_rate": 1.089208899020378e-05, "loss": 0.1344, "step": 6106 }, { "epoch": 0.4875069849125888, "grad_norm": 0.3160146725592781, "learning_rate": 1.0889513808635743e-05, "loss": 0.1495, "step": 6107 }, { "epoch": 0.48758681248503233, "grad_norm": 0.283189439165211, "learning_rate": 1.088693856760723e-05, "loss": 0.2231, "step": 6108 }, { "epoch": 0.48766664005747584, "grad_norm": 0.29015921976827713, "learning_rate": 1.0884363267290383e-05, "loss": 0.1425, "step": 6109 }, { "epoch": 0.48774646762991936, "grad_norm": 0.3162359266005007, "learning_rate": 1.088178790785735e-05, "loss": 0.1746, "step": 6110 }, { "epoch": 0.48782629520236287, "grad_norm": 0.30749272333566197, "learning_rate": 1.0879212489480286e-05, "loss": 0.226, "step": 6111 }, { "epoch": 0.48790612277480644, "grad_norm": 0.3115319238912138, "learning_rate": 1.0876637012331346e-05, "loss": 0.1788, "step": 6112 }, { "epoch": 0.48798595034724995, "grad_norm": 0.3158653171427545, "learning_rate": 1.0874061476582693e-05, "loss": 0.1734, "step": 6113 }, { "epoch": 0.48806577791969347, "grad_norm": 0.2844862929926912, "learning_rate": 1.0871485882406485e-05, "loss": 0.1938, "step": 6114 }, { "epoch": 0.488145605492137, "grad_norm": 0.28850462606615984, "learning_rate": 1.0868910229974898e-05, "loss": 0.1833, "step": 6115 }, { "epoch": 0.4882254330645805, "grad_norm": 0.2613227069839085, "learning_rate": 1.08663345194601e-05, "loss": 0.1744, "step": 6116 }, { "epoch": 0.488305260637024, "grad_norm": 0.31201685230941906, "learning_rate": 1.086375875103427e-05, "loss": 0.1601, "step": 6117 }, { "epoch": 0.4883850882094676, "grad_norm": 0.2922025311440932, "learning_rate": 1.0861182924869584e-05, "loss": 0.19, "step": 6118 }, { "epoch": 0.4884649157819111, "grad_norm": 0.2931375264701743, "learning_rate": 1.0858607041138226e-05, "loss": 0.1945, "step": 6119 }, { "epoch": 0.4885447433543546, "grad_norm": 0.23839046252460977, "learning_rate": 1.0856031100012387e-05, "loss": 0.1542, "step": 6120 }, { "epoch": 0.4886245709267981, "grad_norm": 0.3550908677549504, "learning_rate": 1.0853455101664254e-05, "loss": 0.143, "step": 6121 }, { "epoch": 0.48870439849924163, "grad_norm": 0.31487080824425123, "learning_rate": 1.085087904626603e-05, "loss": 0.1753, "step": 6122 }, { "epoch": 0.48878422607168515, "grad_norm": 0.2575976996134595, "learning_rate": 1.0848302933989905e-05, "loss": 0.1754, "step": 6123 }, { "epoch": 0.48886405364412866, "grad_norm": 0.34495139570489886, "learning_rate": 1.0845726765008089e-05, "loss": 0.168, "step": 6124 }, { "epoch": 0.48894388121657223, "grad_norm": 0.3195904322885316, "learning_rate": 1.0843150539492782e-05, "loss": 0.1737, "step": 6125 }, { "epoch": 0.48902370878901574, "grad_norm": 0.3081499978158786, "learning_rate": 1.08405742576162e-05, "loss": 0.196, "step": 6126 }, { "epoch": 0.48910353636145926, "grad_norm": 0.2821257967786784, "learning_rate": 1.0837997919550555e-05, "loss": 0.2127, "step": 6127 }, { "epoch": 0.48918336393390277, "grad_norm": 0.2837272887986646, "learning_rate": 1.0835421525468067e-05, "loss": 0.1522, "step": 6128 }, { "epoch": 0.4892631915063463, "grad_norm": 0.27284749630748373, "learning_rate": 1.0832845075540952e-05, "loss": 0.1451, "step": 6129 }, { "epoch": 0.4893430190787898, "grad_norm": 0.32825133869105994, "learning_rate": 1.0830268569941444e-05, "loss": 0.1519, "step": 6130 }, { "epoch": 0.4894228466512333, "grad_norm": 0.2962447695778957, "learning_rate": 1.0827692008841766e-05, "loss": 0.1606, "step": 6131 }, { "epoch": 0.4895026742236769, "grad_norm": 0.32242002359351285, "learning_rate": 1.0825115392414154e-05, "loss": 0.2047, "step": 6132 }, { "epoch": 0.4895825017961204, "grad_norm": 0.27504354453165386, "learning_rate": 1.0822538720830841e-05, "loss": 0.1273, "step": 6133 }, { "epoch": 0.4896623293685639, "grad_norm": 0.31174466156036923, "learning_rate": 1.0819961994264072e-05, "loss": 0.1919, "step": 6134 }, { "epoch": 0.4897421569410074, "grad_norm": 0.2879803307341932, "learning_rate": 1.0817385212886086e-05, "loss": 0.1526, "step": 6135 }, { "epoch": 0.48982198451345094, "grad_norm": 0.2549407546397251, "learning_rate": 1.0814808376869135e-05, "loss": 0.1845, "step": 6136 }, { "epoch": 0.48990181208589445, "grad_norm": 0.32463371289723175, "learning_rate": 1.0812231486385469e-05, "loss": 0.1735, "step": 6137 }, { "epoch": 0.48998163965833796, "grad_norm": 0.2837649209773145, "learning_rate": 1.0809654541607342e-05, "loss": 0.1657, "step": 6138 }, { "epoch": 0.49006146723078153, "grad_norm": 0.2973904627898988, "learning_rate": 1.0807077542707012e-05, "loss": 0.1548, "step": 6139 }, { "epoch": 0.49014129480322505, "grad_norm": 0.2974632728799973, "learning_rate": 1.0804500489856744e-05, "loss": 0.1674, "step": 6140 }, { "epoch": 0.49022112237566856, "grad_norm": 0.32026725700918707, "learning_rate": 1.08019233832288e-05, "loss": 0.1697, "step": 6141 }, { "epoch": 0.4903009499481121, "grad_norm": 0.2531951088776388, "learning_rate": 1.0799346222995453e-05, "loss": 0.1971, "step": 6142 }, { "epoch": 0.4903807775205556, "grad_norm": 0.29209024521168037, "learning_rate": 1.0796769009328973e-05, "loss": 0.1831, "step": 6143 }, { "epoch": 0.4904606050929991, "grad_norm": 0.3384317489947669, "learning_rate": 1.079419174240164e-05, "loss": 0.1623, "step": 6144 }, { "epoch": 0.4905404326654426, "grad_norm": 0.34230266812451704, "learning_rate": 1.079161442238573e-05, "loss": 0.1604, "step": 6145 }, { "epoch": 0.4906202602378862, "grad_norm": 0.3060467737130536, "learning_rate": 1.078903704945353e-05, "loss": 0.1667, "step": 6146 }, { "epoch": 0.4907000878103297, "grad_norm": 0.32415273404057676, "learning_rate": 1.0786459623777326e-05, "loss": 0.1658, "step": 6147 }, { "epoch": 0.4907799153827732, "grad_norm": 0.2788051953553932, "learning_rate": 1.0783882145529408e-05, "loss": 0.2092, "step": 6148 }, { "epoch": 0.4908597429552167, "grad_norm": 0.260608401009049, "learning_rate": 1.078130461488207e-05, "loss": 0.1839, "step": 6149 }, { "epoch": 0.49093957052766024, "grad_norm": 0.2787934892607409, "learning_rate": 1.077872703200761e-05, "loss": 0.1762, "step": 6150 }, { "epoch": 0.49101939810010375, "grad_norm": 0.2930210269383243, "learning_rate": 1.077614939707833e-05, "loss": 0.1474, "step": 6151 }, { "epoch": 0.4910992256725473, "grad_norm": 0.2761199396572659, "learning_rate": 1.0773571710266536e-05, "loss": 0.1303, "step": 6152 }, { "epoch": 0.49117905324499084, "grad_norm": 0.2790530636616874, "learning_rate": 1.0770993971744532e-05, "loss": 0.2075, "step": 6153 }, { "epoch": 0.49125888081743435, "grad_norm": 0.2764066136244313, "learning_rate": 1.0768416181684634e-05, "loss": 0.1914, "step": 6154 }, { "epoch": 0.49133870838987787, "grad_norm": 0.34506408510350367, "learning_rate": 1.0765838340259156e-05, "loss": 0.1634, "step": 6155 }, { "epoch": 0.4914185359623214, "grad_norm": 0.32364643909261404, "learning_rate": 1.076326044764041e-05, "loss": 0.1512, "step": 6156 }, { "epoch": 0.4914983635347649, "grad_norm": 0.30169925490778277, "learning_rate": 1.076068250400073e-05, "loss": 0.1618, "step": 6157 }, { "epoch": 0.4915781911072084, "grad_norm": 0.2683685136583876, "learning_rate": 1.0758104509512433e-05, "loss": 0.1335, "step": 6158 }, { "epoch": 0.491658018679652, "grad_norm": 0.28471928996858253, "learning_rate": 1.075552646434785e-05, "loss": 0.2434, "step": 6159 }, { "epoch": 0.4917378462520955, "grad_norm": 0.298591143737453, "learning_rate": 1.075294836867931e-05, "loss": 0.1965, "step": 6160 }, { "epoch": 0.491817673824539, "grad_norm": 0.2812150280507361, "learning_rate": 1.0750370222679155e-05, "loss": 0.1496, "step": 6161 }, { "epoch": 0.4918975013969825, "grad_norm": 0.2821501258802802, "learning_rate": 1.0747792026519721e-05, "loss": 0.149, "step": 6162 }, { "epoch": 0.49197732896942603, "grad_norm": 0.265622780430914, "learning_rate": 1.0745213780373346e-05, "loss": 0.1768, "step": 6163 }, { "epoch": 0.49205715654186954, "grad_norm": 0.34183426822501856, "learning_rate": 1.074263548441238e-05, "loss": 0.1473, "step": 6164 }, { "epoch": 0.49213698411431306, "grad_norm": 0.28855862031247115, "learning_rate": 1.074005713880917e-05, "loss": 0.1221, "step": 6165 }, { "epoch": 0.49221681168675663, "grad_norm": 0.27153050728723443, "learning_rate": 1.073747874373607e-05, "loss": 0.1632, "step": 6166 }, { "epoch": 0.49229663925920014, "grad_norm": 0.3169760216807522, "learning_rate": 1.0734900299365434e-05, "loss": 0.2199, "step": 6167 }, { "epoch": 0.49237646683164366, "grad_norm": 0.2884818522122281, "learning_rate": 1.0732321805869622e-05, "loss": 0.1811, "step": 6168 }, { "epoch": 0.49245629440408717, "grad_norm": 0.30854265490130484, "learning_rate": 1.0729743263420995e-05, "loss": 0.1773, "step": 6169 }, { "epoch": 0.4925361219765307, "grad_norm": 0.2624002360591871, "learning_rate": 1.0727164672191921e-05, "loss": 0.199, "step": 6170 }, { "epoch": 0.4926159495489742, "grad_norm": 0.2752652616469255, "learning_rate": 1.0724586032354762e-05, "loss": 0.1724, "step": 6171 }, { "epoch": 0.4926957771214177, "grad_norm": 0.27871098109805215, "learning_rate": 1.0722007344081897e-05, "loss": 0.1753, "step": 6172 }, { "epoch": 0.4927756046938613, "grad_norm": 0.24978709016967024, "learning_rate": 1.0719428607545697e-05, "loss": 0.1448, "step": 6173 }, { "epoch": 0.4928554322663048, "grad_norm": 0.2834077222497021, "learning_rate": 1.0716849822918542e-05, "loss": 0.1862, "step": 6174 }, { "epoch": 0.4929352598387483, "grad_norm": 0.2834574601640776, "learning_rate": 1.0714270990372814e-05, "loss": 0.1739, "step": 6175 }, { "epoch": 0.4930150874111918, "grad_norm": 0.30007846067402155, "learning_rate": 1.0711692110080897e-05, "loss": 0.1307, "step": 6176 }, { "epoch": 0.49309491498363534, "grad_norm": 0.27798392728623345, "learning_rate": 1.070911318221518e-05, "loss": 0.1625, "step": 6177 }, { "epoch": 0.49317474255607885, "grad_norm": 0.34492481917134216, "learning_rate": 1.070653420694805e-05, "loss": 0.1612, "step": 6178 }, { "epoch": 0.4932545701285224, "grad_norm": 0.29742318639578547, "learning_rate": 1.0703955184451907e-05, "loss": 0.1657, "step": 6179 }, { "epoch": 0.49333439770096593, "grad_norm": 0.30527560376583035, "learning_rate": 1.0701376114899142e-05, "loss": 0.1787, "step": 6180 }, { "epoch": 0.49341422527340945, "grad_norm": 0.2920320898623559, "learning_rate": 1.0698796998462164e-05, "loss": 0.1962, "step": 6181 }, { "epoch": 0.49349405284585296, "grad_norm": 0.34103933297198036, "learning_rate": 1.069621783531337e-05, "loss": 0.1408, "step": 6182 }, { "epoch": 0.4935738804182965, "grad_norm": 0.2936277898828191, "learning_rate": 1.0693638625625168e-05, "loss": 0.1581, "step": 6183 }, { "epoch": 0.49365370799074, "grad_norm": 0.316305362073909, "learning_rate": 1.0691059369569971e-05, "loss": 0.1645, "step": 6184 }, { "epoch": 0.4937335355631835, "grad_norm": 0.28144891784931136, "learning_rate": 1.0688480067320188e-05, "loss": 0.1926, "step": 6185 }, { "epoch": 0.49381336313562707, "grad_norm": 0.26885903231237784, "learning_rate": 1.068590071904824e-05, "loss": 0.1272, "step": 6186 }, { "epoch": 0.4938931907080706, "grad_norm": 0.3222802288731123, "learning_rate": 1.0683321324926539e-05, "loss": 0.2239, "step": 6187 }, { "epoch": 0.4939730182805141, "grad_norm": 0.3030040319965294, "learning_rate": 1.0680741885127515e-05, "loss": 0.1804, "step": 6188 }, { "epoch": 0.4940528458529576, "grad_norm": 0.3040641271698856, "learning_rate": 1.0678162399823586e-05, "loss": 0.141, "step": 6189 }, { "epoch": 0.4941326734254011, "grad_norm": 0.32936912303458876, "learning_rate": 1.067558286918719e-05, "loss": 0.1411, "step": 6190 }, { "epoch": 0.49421250099784464, "grad_norm": 0.29922703617183366, "learning_rate": 1.0673003293390748e-05, "loss": 0.1833, "step": 6191 }, { "epoch": 0.49429232857028815, "grad_norm": 0.25412668243621406, "learning_rate": 1.0670423672606702e-05, "loss": 0.2032, "step": 6192 }, { "epoch": 0.4943721561427317, "grad_norm": 0.29104249220617073, "learning_rate": 1.0667844007007487e-05, "loss": 0.1646, "step": 6193 }, { "epoch": 0.49445198371517524, "grad_norm": 0.317725302760875, "learning_rate": 1.0665264296765541e-05, "loss": 0.1898, "step": 6194 }, { "epoch": 0.49453181128761875, "grad_norm": 0.2803367657533948, "learning_rate": 1.066268454205331e-05, "loss": 0.1831, "step": 6195 }, { "epoch": 0.49461163886006226, "grad_norm": 0.3341072126474013, "learning_rate": 1.066010474304324e-05, "loss": 0.1749, "step": 6196 }, { "epoch": 0.4946914664325058, "grad_norm": 0.34183888526592815, "learning_rate": 1.0657524899907781e-05, "loss": 0.1668, "step": 6197 }, { "epoch": 0.4947712940049493, "grad_norm": 0.27503624263941356, "learning_rate": 1.0654945012819385e-05, "loss": 0.1568, "step": 6198 }, { "epoch": 0.4948511215773928, "grad_norm": 0.31640546838203665, "learning_rate": 1.0652365081950506e-05, "loss": 0.1726, "step": 6199 }, { "epoch": 0.4949309491498364, "grad_norm": 0.28870244513314736, "learning_rate": 1.06497851074736e-05, "loss": 0.2033, "step": 6200 }, { "epoch": 0.4950107767222799, "grad_norm": 0.30928837980907664, "learning_rate": 1.0647205089561136e-05, "loss": 0.2079, "step": 6201 }, { "epoch": 0.4950906042947234, "grad_norm": 0.26827149229336267, "learning_rate": 1.064462502838557e-05, "loss": 0.1555, "step": 6202 }, { "epoch": 0.4951704318671669, "grad_norm": 0.28404903884339744, "learning_rate": 1.0642044924119372e-05, "loss": 0.1874, "step": 6203 }, { "epoch": 0.49525025943961043, "grad_norm": 0.2547273633218579, "learning_rate": 1.0639464776935011e-05, "loss": 0.2198, "step": 6204 }, { "epoch": 0.49533008701205394, "grad_norm": 0.2772348295017586, "learning_rate": 1.0636884587004963e-05, "loss": 0.1837, "step": 6205 }, { "epoch": 0.4954099145844975, "grad_norm": 0.2916971493427168, "learning_rate": 1.0634304354501701e-05, "loss": 0.1904, "step": 6206 }, { "epoch": 0.495489742156941, "grad_norm": 0.36181670468098076, "learning_rate": 1.0631724079597701e-05, "loss": 0.1564, "step": 6207 }, { "epoch": 0.49556956972938454, "grad_norm": 0.28803272283917436, "learning_rate": 1.0629143762465448e-05, "loss": 0.1788, "step": 6208 }, { "epoch": 0.49564939730182805, "grad_norm": 0.3562405963391089, "learning_rate": 1.062656340327742e-05, "loss": 0.1544, "step": 6209 }, { "epoch": 0.49572922487427157, "grad_norm": 0.3505006959924791, "learning_rate": 1.0623983002206113e-05, "loss": 0.1491, "step": 6210 }, { "epoch": 0.4958090524467151, "grad_norm": 0.3708260204893012, "learning_rate": 1.0621402559424008e-05, "loss": 0.1761, "step": 6211 }, { "epoch": 0.4958888800191586, "grad_norm": 0.33365312748112574, "learning_rate": 1.0618822075103604e-05, "loss": 0.1669, "step": 6212 }, { "epoch": 0.49596870759160216, "grad_norm": 0.2557592751819954, "learning_rate": 1.0616241549417392e-05, "loss": 0.1725, "step": 6213 }, { "epoch": 0.4960485351640457, "grad_norm": 0.329811724523595, "learning_rate": 1.0613660982537875e-05, "loss": 0.1698, "step": 6214 }, { "epoch": 0.4961283627364892, "grad_norm": 0.2997293447791246, "learning_rate": 1.0611080374637544e-05, "loss": 0.1754, "step": 6215 }, { "epoch": 0.4962081903089327, "grad_norm": 0.3195499638318571, "learning_rate": 1.060849972588891e-05, "loss": 0.1928, "step": 6216 }, { "epoch": 0.4962880178813762, "grad_norm": 0.30850754685419357, "learning_rate": 1.0605919036464475e-05, "loss": 0.1553, "step": 6217 }, { "epoch": 0.49636784545381973, "grad_norm": 0.319822262014517, "learning_rate": 1.0603338306536758e-05, "loss": 0.1055, "step": 6218 }, { "epoch": 0.49644767302626325, "grad_norm": 0.2728991992910284, "learning_rate": 1.0600757536278254e-05, "loss": 0.2011, "step": 6219 }, { "epoch": 0.4965275005987068, "grad_norm": 0.31747889010102753, "learning_rate": 1.0598176725861492e-05, "loss": 0.167, "step": 6220 }, { "epoch": 0.49660732817115033, "grad_norm": 0.30378859882005227, "learning_rate": 1.0595595875458982e-05, "loss": 0.15, "step": 6221 }, { "epoch": 0.49668715574359384, "grad_norm": 0.44428678023765183, "learning_rate": 1.0593014985243242e-05, "loss": 0.1806, "step": 6222 }, { "epoch": 0.49676698331603736, "grad_norm": 0.33797959794349647, "learning_rate": 1.05904340553868e-05, "loss": 0.1642, "step": 6223 }, { "epoch": 0.49684681088848087, "grad_norm": 0.31928814920704957, "learning_rate": 1.0587853086062177e-05, "loss": 0.1947, "step": 6224 }, { "epoch": 0.4969266384609244, "grad_norm": 0.3458483203058485, "learning_rate": 1.0585272077441902e-05, "loss": 0.1776, "step": 6225 }, { "epoch": 0.4970064660333679, "grad_norm": 0.3043800810323571, "learning_rate": 1.0582691029698501e-05, "loss": 0.1948, "step": 6226 }, { "epoch": 0.49708629360581147, "grad_norm": 0.3315296471289513, "learning_rate": 1.0580109943004518e-05, "loss": 0.1624, "step": 6227 }, { "epoch": 0.497166121178255, "grad_norm": 0.29055370143371734, "learning_rate": 1.0577528817532477e-05, "loss": 0.1576, "step": 6228 }, { "epoch": 0.4972459487506985, "grad_norm": 0.32750464566659115, "learning_rate": 1.057494765345492e-05, "loss": 0.1984, "step": 6229 }, { "epoch": 0.497325776323142, "grad_norm": 0.28803367462059953, "learning_rate": 1.0572366450944387e-05, "loss": 0.1479, "step": 6230 }, { "epoch": 0.4974056038955855, "grad_norm": 0.31507362071809886, "learning_rate": 1.0569785210173422e-05, "loss": 0.153, "step": 6231 }, { "epoch": 0.49748543146802904, "grad_norm": 0.34413691231653903, "learning_rate": 1.0567203931314571e-05, "loss": 0.1474, "step": 6232 }, { "epoch": 0.49756525904047255, "grad_norm": 0.44036405584317256, "learning_rate": 1.0564622614540381e-05, "loss": 0.2033, "step": 6233 }, { "epoch": 0.4976450866129161, "grad_norm": 0.31833944838567985, "learning_rate": 1.0562041260023406e-05, "loss": 0.1382, "step": 6234 }, { "epoch": 0.49772491418535963, "grad_norm": 0.26115067194701586, "learning_rate": 1.0559459867936195e-05, "loss": 0.2022, "step": 6235 }, { "epoch": 0.49780474175780315, "grad_norm": 0.32914215569711663, "learning_rate": 1.0556878438451308e-05, "loss": 0.1543, "step": 6236 }, { "epoch": 0.49788456933024666, "grad_norm": 0.3242249499271022, "learning_rate": 1.05542969717413e-05, "loss": 0.1477, "step": 6237 }, { "epoch": 0.4979643969026902, "grad_norm": 0.3234805888659363, "learning_rate": 1.0551715467978734e-05, "loss": 0.1666, "step": 6238 }, { "epoch": 0.4980442244751337, "grad_norm": 0.29549510760946224, "learning_rate": 1.0549133927336167e-05, "loss": 0.1473, "step": 6239 }, { "epoch": 0.49812405204757726, "grad_norm": 0.3629379245025391, "learning_rate": 1.0546552349986177e-05, "loss": 0.1454, "step": 6240 }, { "epoch": 0.4982038796200208, "grad_norm": 0.34839347823811845, "learning_rate": 1.054397073610132e-05, "loss": 0.1474, "step": 6241 }, { "epoch": 0.4982837071924643, "grad_norm": 0.3101956808863822, "learning_rate": 1.0541389085854177e-05, "loss": 0.1961, "step": 6242 }, { "epoch": 0.4983635347649078, "grad_norm": 0.31100937384917876, "learning_rate": 1.0538807399417315e-05, "loss": 0.1514, "step": 6243 }, { "epoch": 0.4984433623373513, "grad_norm": 0.42687725254069137, "learning_rate": 1.0536225676963308e-05, "loss": 0.1526, "step": 6244 }, { "epoch": 0.49852318990979483, "grad_norm": 0.301723497807046, "learning_rate": 1.0533643918664738e-05, "loss": 0.1704, "step": 6245 }, { "epoch": 0.49860301748223834, "grad_norm": 0.30258126452263856, "learning_rate": 1.0531062124694183e-05, "loss": 0.1461, "step": 6246 }, { "epoch": 0.4986828450546819, "grad_norm": 0.2554805270828899, "learning_rate": 1.0528480295224228e-05, "loss": 0.1549, "step": 6247 }, { "epoch": 0.4987626726271254, "grad_norm": 0.3166677023434743, "learning_rate": 1.0525898430427454e-05, "loss": 0.1925, "step": 6248 }, { "epoch": 0.49884250019956894, "grad_norm": 0.28292636921696823, "learning_rate": 1.0523316530476455e-05, "loss": 0.2074, "step": 6249 }, { "epoch": 0.49892232777201245, "grad_norm": 0.2708841503182726, "learning_rate": 1.0520734595543814e-05, "loss": 0.1474, "step": 6250 }, { "epoch": 0.49900215534445597, "grad_norm": 0.24635948087881698, "learning_rate": 1.0518152625802132e-05, "loss": 0.205, "step": 6251 }, { "epoch": 0.4990819829168995, "grad_norm": 0.32995204040889575, "learning_rate": 1.051557062142399e-05, "loss": 0.177, "step": 6252 }, { "epoch": 0.499161810489343, "grad_norm": 0.2838665496809886, "learning_rate": 1.0512988582581993e-05, "loss": 0.1817, "step": 6253 }, { "epoch": 0.49924163806178656, "grad_norm": 0.2910011788486958, "learning_rate": 1.0510406509448744e-05, "loss": 0.203, "step": 6254 }, { "epoch": 0.4993214656342301, "grad_norm": 0.27069843189173165, "learning_rate": 1.0507824402196836e-05, "loss": 0.1901, "step": 6255 }, { "epoch": 0.4994012932066736, "grad_norm": 0.3077431356142301, "learning_rate": 1.0505242260998878e-05, "loss": 0.1607, "step": 6256 }, { "epoch": 0.4994811207791171, "grad_norm": 0.2695799234803043, "learning_rate": 1.0502660086027472e-05, "loss": 0.183, "step": 6257 }, { "epoch": 0.4995609483515606, "grad_norm": 0.2846045304262556, "learning_rate": 1.0500077877455233e-05, "loss": 0.1855, "step": 6258 }, { "epoch": 0.49964077592400413, "grad_norm": 0.2921828735499565, "learning_rate": 1.0497495635454765e-05, "loss": 0.1754, "step": 6259 }, { "epoch": 0.49972060349644765, "grad_norm": 0.3305942782397816, "learning_rate": 1.0494913360198681e-05, "loss": 0.1918, "step": 6260 }, { "epoch": 0.4998004310688912, "grad_norm": 0.28544610991025154, "learning_rate": 1.0492331051859597e-05, "loss": 0.1691, "step": 6261 }, { "epoch": 0.49988025864133473, "grad_norm": 0.32487473607248285, "learning_rate": 1.0489748710610133e-05, "loss": 0.2101, "step": 6262 }, { "epoch": 0.49996008621377824, "grad_norm": 0.2744708100356994, "learning_rate": 1.0487166336622903e-05, "loss": 0.152, "step": 6263 }, { "epoch": 0.5000399137862218, "grad_norm": 0.2891887063572515, "learning_rate": 1.0484583930070533e-05, "loss": 0.108, "step": 6264 }, { "epoch": 0.5001197413586653, "grad_norm": 0.3568501796534887, "learning_rate": 1.0482001491125644e-05, "loss": 0.1673, "step": 6265 }, { "epoch": 0.5001995689311088, "grad_norm": 0.29467284975384733, "learning_rate": 1.0479419019960865e-05, "loss": 0.1665, "step": 6266 }, { "epoch": 0.5002793965035524, "grad_norm": 0.2717916554890001, "learning_rate": 1.047683651674882e-05, "loss": 0.1545, "step": 6267 }, { "epoch": 0.5003592240759959, "grad_norm": 0.24394167995255647, "learning_rate": 1.0474253981662141e-05, "loss": 0.1462, "step": 6268 }, { "epoch": 0.5004390516484394, "grad_norm": 0.24056240076066246, "learning_rate": 1.047167141487346e-05, "loss": 0.1407, "step": 6269 }, { "epoch": 0.5005188792208829, "grad_norm": 0.33729579227723694, "learning_rate": 1.0469088816555413e-05, "loss": 0.1696, "step": 6270 }, { "epoch": 0.5005987067933264, "grad_norm": 0.2582229316770696, "learning_rate": 1.0466506186880634e-05, "loss": 0.1187, "step": 6271 }, { "epoch": 0.5006785343657699, "grad_norm": 0.30815321895056713, "learning_rate": 1.0463923526021763e-05, "loss": 0.1663, "step": 6272 }, { "epoch": 0.5007583619382134, "grad_norm": 0.2821649575329939, "learning_rate": 1.0461340834151442e-05, "loss": 0.1723, "step": 6273 }, { "epoch": 0.500838189510657, "grad_norm": 0.3058335727470528, "learning_rate": 1.045875811144231e-05, "loss": 0.1542, "step": 6274 }, { "epoch": 0.5009180170831005, "grad_norm": 0.2833968537970787, "learning_rate": 1.0456175358067014e-05, "loss": 0.1632, "step": 6275 }, { "epoch": 0.500997844655544, "grad_norm": 0.2647496780414667, "learning_rate": 1.0453592574198198e-05, "loss": 0.2184, "step": 6276 }, { "epoch": 0.5010776722279875, "grad_norm": 0.3041970284293414, "learning_rate": 1.0451009760008515e-05, "loss": 0.1479, "step": 6277 }, { "epoch": 0.5011574998004311, "grad_norm": 0.28472179886311105, "learning_rate": 1.0448426915670615e-05, "loss": 0.1753, "step": 6278 }, { "epoch": 0.5012373273728746, "grad_norm": 0.2912928988018832, "learning_rate": 1.0445844041357149e-05, "loss": 0.1761, "step": 6279 }, { "epoch": 0.5013171549453181, "grad_norm": 0.3102114927876932, "learning_rate": 1.0443261137240776e-05, "loss": 0.1621, "step": 6280 }, { "epoch": 0.5013969825177617, "grad_norm": 0.2706059506446933, "learning_rate": 1.044067820349415e-05, "loss": 0.2077, "step": 6281 }, { "epoch": 0.5014768100902052, "grad_norm": 0.26658699567569094, "learning_rate": 1.0438095240289929e-05, "loss": 0.1233, "step": 6282 }, { "epoch": 0.5015566376626487, "grad_norm": 0.2763593282661241, "learning_rate": 1.0435512247800773e-05, "loss": 0.1625, "step": 6283 }, { "epoch": 0.5016364652350922, "grad_norm": 0.29761465526826364, "learning_rate": 1.0432929226199348e-05, "loss": 0.1887, "step": 6284 }, { "epoch": 0.5017162928075357, "grad_norm": 0.28401578894428686, "learning_rate": 1.043034617565832e-05, "loss": 0.1486, "step": 6285 }, { "epoch": 0.5017961203799792, "grad_norm": 0.28641538287972407, "learning_rate": 1.042776309635035e-05, "loss": 0.1881, "step": 6286 }, { "epoch": 0.5018759479524227, "grad_norm": 0.3143816929277833, "learning_rate": 1.0425179988448109e-05, "loss": 0.1962, "step": 6287 }, { "epoch": 0.5019557755248663, "grad_norm": 0.2871893022006096, "learning_rate": 1.042259685212427e-05, "loss": 0.1988, "step": 6288 }, { "epoch": 0.5020356030973098, "grad_norm": 0.2930127023196749, "learning_rate": 1.0420013687551506e-05, "loss": 0.1514, "step": 6289 }, { "epoch": 0.5021154306697533, "grad_norm": 0.27764763915218404, "learning_rate": 1.0417430494902488e-05, "loss": 0.1584, "step": 6290 }, { "epoch": 0.5021952582421969, "grad_norm": 0.29073526144746786, "learning_rate": 1.0414847274349893e-05, "loss": 0.1786, "step": 6291 }, { "epoch": 0.5022750858146404, "grad_norm": 0.24908512539098335, "learning_rate": 1.0412264026066397e-05, "loss": 0.1732, "step": 6292 }, { "epoch": 0.5023549133870839, "grad_norm": 0.29016171600005264, "learning_rate": 1.0409680750224686e-05, "loss": 0.1589, "step": 6293 }, { "epoch": 0.5024347409595274, "grad_norm": 0.30899643509213587, "learning_rate": 1.0407097446997436e-05, "loss": 0.1872, "step": 6294 }, { "epoch": 0.502514568531971, "grad_norm": 0.27400245308965376, "learning_rate": 1.0404514116557334e-05, "loss": 0.1663, "step": 6295 }, { "epoch": 0.5025943961044145, "grad_norm": 0.26736291874105794, "learning_rate": 1.0401930759077063e-05, "loss": 0.1294, "step": 6296 }, { "epoch": 0.502674223676858, "grad_norm": 0.27721228295019057, "learning_rate": 1.0399347374729315e-05, "loss": 0.201, "step": 6297 }, { "epoch": 0.5027540512493015, "grad_norm": 0.3109465173885593, "learning_rate": 1.039676396368677e-05, "loss": 0.154, "step": 6298 }, { "epoch": 0.502833878821745, "grad_norm": 0.2966657391937635, "learning_rate": 1.0394180526122126e-05, "loss": 0.1578, "step": 6299 }, { "epoch": 0.5029137063941885, "grad_norm": 0.2863488157410633, "learning_rate": 1.0391597062208074e-05, "loss": 0.1991, "step": 6300 }, { "epoch": 0.502993533966632, "grad_norm": 0.3848983586489616, "learning_rate": 1.0389013572117309e-05, "loss": 0.1717, "step": 6301 }, { "epoch": 0.5030733615390756, "grad_norm": 0.2640019307047574, "learning_rate": 1.0386430056022525e-05, "loss": 0.171, "step": 6302 }, { "epoch": 0.5031531891115191, "grad_norm": 0.3080726406652322, "learning_rate": 1.0383846514096423e-05, "loss": 0.1949, "step": 6303 }, { "epoch": 0.5032330166839626, "grad_norm": 0.2679207953160398, "learning_rate": 1.03812629465117e-05, "loss": 0.1841, "step": 6304 }, { "epoch": 0.5033128442564062, "grad_norm": 0.3151940622369751, "learning_rate": 1.0378679353441055e-05, "loss": 0.1788, "step": 6305 }, { "epoch": 0.5033926718288497, "grad_norm": 0.3015853916263842, "learning_rate": 1.0376095735057198e-05, "loss": 0.1698, "step": 6306 }, { "epoch": 0.5034724994012932, "grad_norm": 0.30603045230370407, "learning_rate": 1.0373512091532824e-05, "loss": 0.2201, "step": 6307 }, { "epoch": 0.5035523269737368, "grad_norm": 0.28207986775896166, "learning_rate": 1.0370928423040649e-05, "loss": 0.1818, "step": 6308 }, { "epoch": 0.5036321545461803, "grad_norm": 0.2615312740948291, "learning_rate": 1.0368344729753374e-05, "loss": 0.1417, "step": 6309 }, { "epoch": 0.5037119821186238, "grad_norm": 0.3302323473738448, "learning_rate": 1.0365761011843716e-05, "loss": 0.1484, "step": 6310 }, { "epoch": 0.5037918096910673, "grad_norm": 0.2965133553614229, "learning_rate": 1.0363177269484379e-05, "loss": 0.1602, "step": 6311 }, { "epoch": 0.5038716372635108, "grad_norm": 0.29392506659209466, "learning_rate": 1.0360593502848078e-05, "loss": 0.157, "step": 6312 }, { "epoch": 0.5039514648359543, "grad_norm": 0.3444002794721201, "learning_rate": 1.035800971210753e-05, "loss": 0.1679, "step": 6313 }, { "epoch": 0.5040312924083978, "grad_norm": 0.29238903659119975, "learning_rate": 1.0355425897435447e-05, "loss": 0.163, "step": 6314 }, { "epoch": 0.5041111199808413, "grad_norm": 0.2846618022703277, "learning_rate": 1.0352842059004552e-05, "loss": 0.1789, "step": 6315 }, { "epoch": 0.5041909475532849, "grad_norm": 0.27913431404503697, "learning_rate": 1.0350258196987561e-05, "loss": 0.138, "step": 6316 }, { "epoch": 0.5042707751257284, "grad_norm": 0.2835943655442418, "learning_rate": 1.0347674311557197e-05, "loss": 0.1798, "step": 6317 }, { "epoch": 0.504350602698172, "grad_norm": 0.2501746027433234, "learning_rate": 1.034509040288618e-05, "loss": 0.1744, "step": 6318 }, { "epoch": 0.5044304302706155, "grad_norm": 0.24988943850363143, "learning_rate": 1.0342506471147236e-05, "loss": 0.1688, "step": 6319 }, { "epoch": 0.504510257843059, "grad_norm": 0.2958769958982466, "learning_rate": 1.0339922516513087e-05, "loss": 0.1526, "step": 6320 }, { "epoch": 0.5045900854155025, "grad_norm": 0.33948255572724334, "learning_rate": 1.0337338539156465e-05, "loss": 0.1744, "step": 6321 }, { "epoch": 0.5046699129879461, "grad_norm": 0.3229739264393325, "learning_rate": 1.0334754539250096e-05, "loss": 0.2274, "step": 6322 }, { "epoch": 0.5047497405603896, "grad_norm": 0.3208821941831054, "learning_rate": 1.033217051696671e-05, "loss": 0.2157, "step": 6323 }, { "epoch": 0.5048295681328331, "grad_norm": 0.26864596141749975, "learning_rate": 1.0329586472479042e-05, "loss": 0.1728, "step": 6324 }, { "epoch": 0.5049093957052766, "grad_norm": 0.2631157489999864, "learning_rate": 1.0327002405959824e-05, "loss": 0.1686, "step": 6325 }, { "epoch": 0.5049892232777201, "grad_norm": 0.32202267077611607, "learning_rate": 1.0324418317581786e-05, "loss": 0.1844, "step": 6326 }, { "epoch": 0.5050690508501636, "grad_norm": 0.324485539041607, "learning_rate": 1.0321834207517668e-05, "loss": 0.188, "step": 6327 }, { "epoch": 0.5051488784226071, "grad_norm": 0.2817052626762404, "learning_rate": 1.0319250075940206e-05, "loss": 0.1822, "step": 6328 }, { "epoch": 0.5052287059950507, "grad_norm": 0.3123597712136166, "learning_rate": 1.031666592302214e-05, "loss": 0.1643, "step": 6329 }, { "epoch": 0.5053085335674942, "grad_norm": 0.27009068383139484, "learning_rate": 1.0314081748936211e-05, "loss": 0.1649, "step": 6330 }, { "epoch": 0.5053883611399377, "grad_norm": 0.3447635176894219, "learning_rate": 1.0311497553855159e-05, "loss": 0.1679, "step": 6331 }, { "epoch": 0.5054681887123813, "grad_norm": 0.2642357663227884, "learning_rate": 1.0308913337951732e-05, "loss": 0.1799, "step": 6332 }, { "epoch": 0.5055480162848248, "grad_norm": 0.38280657926977435, "learning_rate": 1.0306329101398667e-05, "loss": 0.1819, "step": 6333 }, { "epoch": 0.5056278438572683, "grad_norm": 0.31628472431435917, "learning_rate": 1.0303744844368715e-05, "loss": 0.1681, "step": 6334 }, { "epoch": 0.5057076714297118, "grad_norm": 0.2815097752360896, "learning_rate": 1.0301160567034622e-05, "loss": 0.2302, "step": 6335 }, { "epoch": 0.5057874990021554, "grad_norm": 0.28201189447510805, "learning_rate": 1.0298576269569136e-05, "loss": 0.1557, "step": 6336 }, { "epoch": 0.5058673265745989, "grad_norm": 0.2737301219952982, "learning_rate": 1.0295991952145011e-05, "loss": 0.1526, "step": 6337 }, { "epoch": 0.5059471541470424, "grad_norm": 0.24857870381177025, "learning_rate": 1.029340761493499e-05, "loss": 0.1794, "step": 6338 }, { "epoch": 0.5060269817194859, "grad_norm": 0.33041220120406617, "learning_rate": 1.0290823258111838e-05, "loss": 0.1944, "step": 6339 }, { "epoch": 0.5061068092919294, "grad_norm": 0.35205359898646826, "learning_rate": 1.0288238881848298e-05, "loss": 0.1794, "step": 6340 }, { "epoch": 0.5061866368643729, "grad_norm": 0.3097907640215034, "learning_rate": 1.028565448631713e-05, "loss": 0.2007, "step": 6341 }, { "epoch": 0.5062664644368164, "grad_norm": 0.3136441962789258, "learning_rate": 1.0283070071691091e-05, "loss": 0.1507, "step": 6342 }, { "epoch": 0.50634629200926, "grad_norm": 0.27937175928110675, "learning_rate": 1.0280485638142937e-05, "loss": 0.1581, "step": 6343 }, { "epoch": 0.5064261195817035, "grad_norm": 0.30371460954538715, "learning_rate": 1.0277901185845427e-05, "loss": 0.1594, "step": 6344 }, { "epoch": 0.5065059471541471, "grad_norm": 0.3464098219604447, "learning_rate": 1.0275316714971324e-05, "loss": 0.1567, "step": 6345 }, { "epoch": 0.5065857747265906, "grad_norm": 0.3329611725042626, "learning_rate": 1.0272732225693384e-05, "loss": 0.1748, "step": 6346 }, { "epoch": 0.5066656022990341, "grad_norm": 0.3206295914747823, "learning_rate": 1.0270147718184378e-05, "loss": 0.1443, "step": 6347 }, { "epoch": 0.5067454298714776, "grad_norm": 0.30322109295173516, "learning_rate": 1.026756319261707e-05, "loss": 0.1846, "step": 6348 }, { "epoch": 0.5068252574439212, "grad_norm": 0.29508500534598925, "learning_rate": 1.0264978649164215e-05, "loss": 0.1736, "step": 6349 }, { "epoch": 0.5069050850163647, "grad_norm": 0.29971632515959334, "learning_rate": 1.0262394087998589e-05, "loss": 0.1732, "step": 6350 }, { "epoch": 0.5069849125888082, "grad_norm": 0.3344376090162501, "learning_rate": 1.0259809509292955e-05, "loss": 0.2041, "step": 6351 }, { "epoch": 0.5070647401612517, "grad_norm": 0.29854474531522057, "learning_rate": 1.0257224913220084e-05, "loss": 0.2157, "step": 6352 }, { "epoch": 0.5071445677336952, "grad_norm": 0.3470145412169554, "learning_rate": 1.0254640299952745e-05, "loss": 0.1998, "step": 6353 }, { "epoch": 0.5072243953061387, "grad_norm": 0.3108097022371574, "learning_rate": 1.0252055669663713e-05, "loss": 0.1596, "step": 6354 }, { "epoch": 0.5073042228785822, "grad_norm": 0.3095542909580449, "learning_rate": 1.0249471022525755e-05, "loss": 0.184, "step": 6355 }, { "epoch": 0.5073840504510257, "grad_norm": 0.31472961588019543, "learning_rate": 1.0246886358711643e-05, "loss": 0.1494, "step": 6356 }, { "epoch": 0.5074638780234693, "grad_norm": 0.35723417707072197, "learning_rate": 1.024430167839416e-05, "loss": 0.1664, "step": 6357 }, { "epoch": 0.5075437055959128, "grad_norm": 0.3087340672695175, "learning_rate": 1.0241716981746073e-05, "loss": 0.1814, "step": 6358 }, { "epoch": 0.5076235331683564, "grad_norm": 0.373749142905889, "learning_rate": 1.0239132268940165e-05, "loss": 0.1871, "step": 6359 }, { "epoch": 0.5077033607407999, "grad_norm": 0.3380739673570448, "learning_rate": 1.0236547540149208e-05, "loss": 0.1416, "step": 6360 }, { "epoch": 0.5077831883132434, "grad_norm": 0.26782555736821245, "learning_rate": 1.0233962795545987e-05, "loss": 0.1522, "step": 6361 }, { "epoch": 0.5078630158856869, "grad_norm": 0.284988426217731, "learning_rate": 1.0231378035303276e-05, "loss": 0.1585, "step": 6362 }, { "epoch": 0.5079428434581305, "grad_norm": 0.3433345283248454, "learning_rate": 1.0228793259593865e-05, "loss": 0.1424, "step": 6363 }, { "epoch": 0.508022671030574, "grad_norm": 0.3142188056022053, "learning_rate": 1.0226208468590525e-05, "loss": 0.1355, "step": 6364 }, { "epoch": 0.5081024986030175, "grad_norm": 0.3896818484400576, "learning_rate": 1.0223623662466045e-05, "loss": 0.1908, "step": 6365 }, { "epoch": 0.508182326175461, "grad_norm": 0.2623495330492631, "learning_rate": 1.0221038841393207e-05, "loss": 0.1286, "step": 6366 }, { "epoch": 0.5082621537479045, "grad_norm": 0.24420789152251265, "learning_rate": 1.02184540055448e-05, "loss": 0.216, "step": 6367 }, { "epoch": 0.508341981320348, "grad_norm": 0.30263345647173545, "learning_rate": 1.0215869155093605e-05, "loss": 0.1568, "step": 6368 }, { "epoch": 0.5084218088927915, "grad_norm": 0.2954659756775988, "learning_rate": 1.0213284290212411e-05, "loss": 0.1696, "step": 6369 }, { "epoch": 0.508501636465235, "grad_norm": 0.28039916150072225, "learning_rate": 1.021069941107401e-05, "loss": 0.1228, "step": 6370 }, { "epoch": 0.5085814640376786, "grad_norm": 0.29424563114961894, "learning_rate": 1.0208114517851181e-05, "loss": 0.1615, "step": 6371 }, { "epoch": 0.5086612916101222, "grad_norm": 0.3145038236365229, "learning_rate": 1.0205529610716727e-05, "loss": 0.1531, "step": 6372 }, { "epoch": 0.5087411191825657, "grad_norm": 0.3558388995992025, "learning_rate": 1.0202944689843426e-05, "loss": 0.1511, "step": 6373 }, { "epoch": 0.5088209467550092, "grad_norm": 0.30896350822681146, "learning_rate": 1.0200359755404078e-05, "loss": 0.1385, "step": 6374 }, { "epoch": 0.5089007743274527, "grad_norm": 0.34550577561221885, "learning_rate": 1.0197774807571473e-05, "loss": 0.1415, "step": 6375 }, { "epoch": 0.5089806018998962, "grad_norm": 0.30727252239538244, "learning_rate": 1.0195189846518405e-05, "loss": 0.1551, "step": 6376 }, { "epoch": 0.5090604294723398, "grad_norm": 0.31643474337855987, "learning_rate": 1.0192604872417671e-05, "loss": 0.1801, "step": 6377 }, { "epoch": 0.5091402570447833, "grad_norm": 0.2945691069885287, "learning_rate": 1.0190019885442057e-05, "loss": 0.1535, "step": 6378 }, { "epoch": 0.5092200846172268, "grad_norm": 0.2697145146389506, "learning_rate": 1.018743488576437e-05, "loss": 0.1993, "step": 6379 }, { "epoch": 0.5092999121896703, "grad_norm": 0.2751610040235751, "learning_rate": 1.0184849873557403e-05, "loss": 0.1553, "step": 6380 }, { "epoch": 0.5093797397621138, "grad_norm": 0.2809981833548066, "learning_rate": 1.0182264848993953e-05, "loss": 0.1694, "step": 6381 }, { "epoch": 0.5094595673345573, "grad_norm": 0.30465727010446697, "learning_rate": 1.0179679812246816e-05, "loss": 0.1875, "step": 6382 }, { "epoch": 0.5095393949070008, "grad_norm": 0.26944908790898303, "learning_rate": 1.01770947634888e-05, "loss": 0.1596, "step": 6383 }, { "epoch": 0.5096192224794444, "grad_norm": 0.3148040372183131, "learning_rate": 1.0174509702892697e-05, "loss": 0.1926, "step": 6384 }, { "epoch": 0.5096990500518879, "grad_norm": 0.3550031389153748, "learning_rate": 1.0171924630631314e-05, "loss": 0.1864, "step": 6385 }, { "epoch": 0.5097788776243315, "grad_norm": 0.41650717479534916, "learning_rate": 1.0169339546877443e-05, "loss": 0.1788, "step": 6386 }, { "epoch": 0.509858705196775, "grad_norm": 0.2708493752697227, "learning_rate": 1.01667544518039e-05, "loss": 0.1784, "step": 6387 }, { "epoch": 0.5099385327692185, "grad_norm": 0.27367012128173346, "learning_rate": 1.0164169345583476e-05, "loss": 0.1407, "step": 6388 }, { "epoch": 0.510018360341662, "grad_norm": 0.2800919316467052, "learning_rate": 1.0161584228388987e-05, "loss": 0.1893, "step": 6389 }, { "epoch": 0.5100981879141055, "grad_norm": 0.2974500402647826, "learning_rate": 1.0158999100393228e-05, "loss": 0.1532, "step": 6390 }, { "epoch": 0.5101780154865491, "grad_norm": 0.2795690114604502, "learning_rate": 1.015641396176901e-05, "loss": 0.164, "step": 6391 }, { "epoch": 0.5102578430589926, "grad_norm": 0.3215554668705874, "learning_rate": 1.0153828812689137e-05, "loss": 0.2099, "step": 6392 }, { "epoch": 0.5103376706314361, "grad_norm": 0.3335561235343437, "learning_rate": 1.0151243653326417e-05, "loss": 0.1603, "step": 6393 }, { "epoch": 0.5104174982038796, "grad_norm": 0.2742419496615939, "learning_rate": 1.0148658483853658e-05, "loss": 0.1552, "step": 6394 }, { "epoch": 0.5104973257763231, "grad_norm": 0.2902558164806287, "learning_rate": 1.0146073304443664e-05, "loss": 0.1833, "step": 6395 }, { "epoch": 0.5105771533487666, "grad_norm": 0.2595404308156642, "learning_rate": 1.014348811526925e-05, "loss": 0.1715, "step": 6396 }, { "epoch": 0.5106569809212101, "grad_norm": 0.3167988606327296, "learning_rate": 1.0140902916503224e-05, "loss": 0.1619, "step": 6397 }, { "epoch": 0.5107368084936537, "grad_norm": 0.28372085709638106, "learning_rate": 1.0138317708318394e-05, "loss": 0.166, "step": 6398 }, { "epoch": 0.5108166360660973, "grad_norm": 0.2937681535575845, "learning_rate": 1.0135732490887573e-05, "loss": 0.1374, "step": 6399 }, { "epoch": 0.5108964636385408, "grad_norm": 0.29038564991650173, "learning_rate": 1.0133147264383575e-05, "loss": 0.1344, "step": 6400 }, { "epoch": 0.5109762912109843, "grad_norm": 0.3031538702586447, "learning_rate": 1.0130562028979206e-05, "loss": 0.17, "step": 6401 }, { "epoch": 0.5110561187834278, "grad_norm": 0.2990220493772894, "learning_rate": 1.0127976784847284e-05, "loss": 0.1574, "step": 6402 }, { "epoch": 0.5111359463558713, "grad_norm": 0.2625660880435763, "learning_rate": 1.012539153216062e-05, "loss": 0.1735, "step": 6403 }, { "epoch": 0.5112157739283149, "grad_norm": 0.2992882458541399, "learning_rate": 1.0122806271092028e-05, "loss": 0.2077, "step": 6404 }, { "epoch": 0.5112956015007584, "grad_norm": 0.32227883641448446, "learning_rate": 1.0120221001814323e-05, "loss": 0.1854, "step": 6405 }, { "epoch": 0.5113754290732019, "grad_norm": 0.27800209448592883, "learning_rate": 1.011763572450032e-05, "loss": 0.1848, "step": 6406 }, { "epoch": 0.5114552566456454, "grad_norm": 0.3033118464650473, "learning_rate": 1.0115050439322836e-05, "loss": 0.1823, "step": 6407 }, { "epoch": 0.5115350842180889, "grad_norm": 0.29621766941503874, "learning_rate": 1.0112465146454681e-05, "loss": 0.1643, "step": 6408 }, { "epoch": 0.5116149117905324, "grad_norm": 0.2986744332378709, "learning_rate": 1.010987984606868e-05, "loss": 0.186, "step": 6409 }, { "epoch": 0.5116947393629759, "grad_norm": 0.2853316265997015, "learning_rate": 1.0107294538337644e-05, "loss": 0.1673, "step": 6410 }, { "epoch": 0.5117745669354195, "grad_norm": 0.3111462551709488, "learning_rate": 1.0104709223434392e-05, "loss": 0.191, "step": 6411 }, { "epoch": 0.511854394507863, "grad_norm": 0.27746112255583083, "learning_rate": 1.0102123901531744e-05, "loss": 0.1472, "step": 6412 }, { "epoch": 0.5119342220803066, "grad_norm": 0.2825834626928543, "learning_rate": 1.0099538572802518e-05, "loss": 0.1584, "step": 6413 }, { "epoch": 0.5120140496527501, "grad_norm": 0.28989693294614627, "learning_rate": 1.0096953237419532e-05, "loss": 0.1273, "step": 6414 }, { "epoch": 0.5120938772251936, "grad_norm": 0.2717165458791069, "learning_rate": 1.0094367895555605e-05, "loss": 0.1536, "step": 6415 }, { "epoch": 0.5121737047976371, "grad_norm": 0.29270065471084183, "learning_rate": 1.009178254738356e-05, "loss": 0.1736, "step": 6416 }, { "epoch": 0.5122535323700806, "grad_norm": 0.29738257344454083, "learning_rate": 1.008919719307621e-05, "loss": 0.2079, "step": 6417 }, { "epoch": 0.5123333599425242, "grad_norm": 0.24591409818809062, "learning_rate": 1.0086611832806386e-05, "loss": 0.157, "step": 6418 }, { "epoch": 0.5124131875149677, "grad_norm": 0.2892750309283223, "learning_rate": 1.0084026466746899e-05, "loss": 0.1683, "step": 6419 }, { "epoch": 0.5124930150874112, "grad_norm": 0.30245142588473, "learning_rate": 1.0081441095070578e-05, "loss": 0.1409, "step": 6420 }, { "epoch": 0.5125728426598547, "grad_norm": 0.27759267270758436, "learning_rate": 1.0078855717950242e-05, "loss": 0.1658, "step": 6421 }, { "epoch": 0.5126526702322982, "grad_norm": 0.29407243645457487, "learning_rate": 1.0076270335558714e-05, "loss": 0.1677, "step": 6422 }, { "epoch": 0.5127324978047417, "grad_norm": 0.2864417585928312, "learning_rate": 1.0073684948068815e-05, "loss": 0.1724, "step": 6423 }, { "epoch": 0.5128123253771852, "grad_norm": 0.27043810691972237, "learning_rate": 1.0071099555653368e-05, "loss": 0.1662, "step": 6424 }, { "epoch": 0.5128921529496288, "grad_norm": 0.2736405654401569, "learning_rate": 1.00685141584852e-05, "loss": 0.1957, "step": 6425 }, { "epoch": 0.5129719805220723, "grad_norm": 0.3006485792357092, "learning_rate": 1.0065928756737127e-05, "loss": 0.2082, "step": 6426 }, { "epoch": 0.5130518080945159, "grad_norm": 0.27891839006570024, "learning_rate": 1.006334335058198e-05, "loss": 0.1901, "step": 6427 }, { "epoch": 0.5131316356669594, "grad_norm": 0.30676707329073544, "learning_rate": 1.0060757940192581e-05, "loss": 0.1688, "step": 6428 }, { "epoch": 0.5132114632394029, "grad_norm": 0.29848105825147986, "learning_rate": 1.0058172525741754e-05, "loss": 0.1494, "step": 6429 }, { "epoch": 0.5132912908118464, "grad_norm": 0.28384328357928695, "learning_rate": 1.0055587107402324e-05, "loss": 0.201, "step": 6430 }, { "epoch": 0.51337111838429, "grad_norm": 0.30639108743233123, "learning_rate": 1.0053001685347117e-05, "loss": 0.1661, "step": 6431 }, { "epoch": 0.5134509459567335, "grad_norm": 0.28428438708618786, "learning_rate": 1.0050416259748953e-05, "loss": 0.2231, "step": 6432 }, { "epoch": 0.513530773529177, "grad_norm": 0.31502925980842095, "learning_rate": 1.0047830830780665e-05, "loss": 0.1767, "step": 6433 }, { "epoch": 0.5136106011016205, "grad_norm": 0.25819575591255667, "learning_rate": 1.0045245398615074e-05, "loss": 0.1721, "step": 6434 }, { "epoch": 0.513690428674064, "grad_norm": 0.31312308629794156, "learning_rate": 1.0042659963425007e-05, "loss": 0.1678, "step": 6435 }, { "epoch": 0.5137702562465075, "grad_norm": 0.2836292120193043, "learning_rate": 1.0040074525383291e-05, "loss": 0.2079, "step": 6436 }, { "epoch": 0.513850083818951, "grad_norm": 0.2898977154443184, "learning_rate": 1.0037489084662752e-05, "loss": 0.1763, "step": 6437 }, { "epoch": 0.5139299113913945, "grad_norm": 0.30238643831713274, "learning_rate": 1.0034903641436217e-05, "loss": 0.188, "step": 6438 }, { "epoch": 0.5140097389638381, "grad_norm": 0.35570734844270474, "learning_rate": 1.003231819587651e-05, "loss": 0.1879, "step": 6439 }, { "epoch": 0.5140895665362817, "grad_norm": 0.24281156125242204, "learning_rate": 1.002973274815646e-05, "loss": 0.1798, "step": 6440 }, { "epoch": 0.5141693941087252, "grad_norm": 0.30921802501602447, "learning_rate": 1.0027147298448892e-05, "loss": 0.1697, "step": 6441 }, { "epoch": 0.5142492216811687, "grad_norm": 0.2771108831705895, "learning_rate": 1.0024561846926635e-05, "loss": 0.2062, "step": 6442 }, { "epoch": 0.5143290492536122, "grad_norm": 0.2725800004846027, "learning_rate": 1.0021976393762516e-05, "loss": 0.2102, "step": 6443 }, { "epoch": 0.5144088768260557, "grad_norm": 0.3000763934345857, "learning_rate": 1.0019390939129363e-05, "loss": 0.1865, "step": 6444 }, { "epoch": 0.5144887043984993, "grad_norm": 0.32491716754419403, "learning_rate": 1.0016805483200003e-05, "loss": 0.1585, "step": 6445 }, { "epoch": 0.5145685319709428, "grad_norm": 0.3205975129803512, "learning_rate": 1.0014220026147261e-05, "loss": 0.1946, "step": 6446 }, { "epoch": 0.5146483595433863, "grad_norm": 0.26786809659327815, "learning_rate": 1.0011634568143969e-05, "loss": 0.1488, "step": 6447 }, { "epoch": 0.5147281871158298, "grad_norm": 0.2986455041529068, "learning_rate": 1.0009049109362947e-05, "loss": 0.1738, "step": 6448 }, { "epoch": 0.5148080146882733, "grad_norm": 0.3033112118181951, "learning_rate": 1.0006463649977034e-05, "loss": 0.1449, "step": 6449 }, { "epoch": 0.5148878422607168, "grad_norm": 0.3356816765799148, "learning_rate": 1.0003878190159046e-05, "loss": 0.187, "step": 6450 }, { "epoch": 0.5149676698331603, "grad_norm": 0.2757679300224677, "learning_rate": 1.0001292730081822e-05, "loss": 0.1852, "step": 6451 }, { "epoch": 0.5150474974056038, "grad_norm": 0.2927258796874481, "learning_rate": 9.998707269918183e-06, "loss": 0.2005, "step": 6452 }, { "epoch": 0.5151273249780474, "grad_norm": 0.3016599806029626, "learning_rate": 9.996121809840953e-06, "loss": 0.1897, "step": 6453 }, { "epoch": 0.515207152550491, "grad_norm": 0.23901231077905105, "learning_rate": 9.993536350022969e-06, "loss": 0.1865, "step": 6454 }, { "epoch": 0.5152869801229345, "grad_norm": 0.3306928762541168, "learning_rate": 9.990950890637053e-06, "loss": 0.2075, "step": 6455 }, { "epoch": 0.515366807695378, "grad_norm": 0.2989331848875522, "learning_rate": 9.988365431856035e-06, "loss": 0.1754, "step": 6456 }, { "epoch": 0.5154466352678215, "grad_norm": 0.32717741848875354, "learning_rate": 9.985779973852742e-06, "loss": 0.1803, "step": 6457 }, { "epoch": 0.515526462840265, "grad_norm": 0.2759700610465192, "learning_rate": 9.9831945168e-06, "loss": 0.1677, "step": 6458 }, { "epoch": 0.5156062904127086, "grad_norm": 0.3552938974666383, "learning_rate": 9.980609060870639e-06, "loss": 0.2036, "step": 6459 }, { "epoch": 0.5156861179851521, "grad_norm": 0.2883839622074223, "learning_rate": 9.978023606237485e-06, "loss": 0.1653, "step": 6460 }, { "epoch": 0.5157659455575956, "grad_norm": 0.32661679790298265, "learning_rate": 9.975438153073367e-06, "loss": 0.1749, "step": 6461 }, { "epoch": 0.5158457731300391, "grad_norm": 0.3416585806566331, "learning_rate": 9.97285270155111e-06, "loss": 0.2276, "step": 6462 }, { "epoch": 0.5159256007024826, "grad_norm": 0.28118405434375315, "learning_rate": 9.970267251843544e-06, "loss": 0.1825, "step": 6463 }, { "epoch": 0.5160054282749261, "grad_norm": 0.2760094894958044, "learning_rate": 9.967681804123497e-06, "loss": 0.1911, "step": 6464 }, { "epoch": 0.5160852558473696, "grad_norm": 0.2518434626677025, "learning_rate": 9.965096358563786e-06, "loss": 0.1533, "step": 6465 }, { "epoch": 0.5161650834198132, "grad_norm": 0.3076836677278247, "learning_rate": 9.96251091533725e-06, "loss": 0.1879, "step": 6466 }, { "epoch": 0.5162449109922568, "grad_norm": 0.36883247425747123, "learning_rate": 9.95992547461671e-06, "loss": 0.1566, "step": 6467 }, { "epoch": 0.5163247385647003, "grad_norm": 0.2889623439390059, "learning_rate": 9.957340036574994e-06, "loss": 0.1508, "step": 6468 }, { "epoch": 0.5164045661371438, "grad_norm": 0.3026882382974333, "learning_rate": 9.954754601384926e-06, "loss": 0.1796, "step": 6469 }, { "epoch": 0.5164843937095873, "grad_norm": 0.3178817025160252, "learning_rate": 9.952169169219337e-06, "loss": 0.143, "step": 6470 }, { "epoch": 0.5165642212820308, "grad_norm": 0.30443603271157527, "learning_rate": 9.949583740251052e-06, "loss": 0.1443, "step": 6471 }, { "epoch": 0.5166440488544743, "grad_norm": 0.34992077091384555, "learning_rate": 9.946998314652886e-06, "loss": 0.1804, "step": 6472 }, { "epoch": 0.5167238764269179, "grad_norm": 0.2584575847915926, "learning_rate": 9.94441289259768e-06, "loss": 0.1367, "step": 6473 }, { "epoch": 0.5168037039993614, "grad_norm": 0.2855797676972595, "learning_rate": 9.941827474258247e-06, "loss": 0.1429, "step": 6474 }, { "epoch": 0.5168835315718049, "grad_norm": 0.2648103609359927, "learning_rate": 9.939242059807424e-06, "loss": 0.2231, "step": 6475 }, { "epoch": 0.5169633591442484, "grad_norm": 0.26677471662696994, "learning_rate": 9.93665664941802e-06, "loss": 0.1751, "step": 6476 }, { "epoch": 0.5170431867166919, "grad_norm": 0.26771991908847315, "learning_rate": 9.934071243262876e-06, "loss": 0.1648, "step": 6477 }, { "epoch": 0.5171230142891354, "grad_norm": 0.2917867206377129, "learning_rate": 9.931485841514806e-06, "loss": 0.1913, "step": 6478 }, { "epoch": 0.5172028418615789, "grad_norm": 0.3124178870630475, "learning_rate": 9.928900444346636e-06, "loss": 0.1743, "step": 6479 }, { "epoch": 0.5172826694340225, "grad_norm": 0.28220161706881475, "learning_rate": 9.92631505193119e-06, "loss": 0.122, "step": 6480 }, { "epoch": 0.5173624970064661, "grad_norm": 0.30065068509412235, "learning_rate": 9.92372966444129e-06, "loss": 0.1811, "step": 6481 }, { "epoch": 0.5174423245789096, "grad_norm": 0.3098550564058076, "learning_rate": 9.921144282049762e-06, "loss": 0.2155, "step": 6482 }, { "epoch": 0.5175221521513531, "grad_norm": 0.2747049450197924, "learning_rate": 9.918558904929423e-06, "loss": 0.1814, "step": 6483 }, { "epoch": 0.5176019797237966, "grad_norm": 0.28364704531564616, "learning_rate": 9.915973533253104e-06, "loss": 0.1605, "step": 6484 }, { "epoch": 0.5176818072962401, "grad_norm": 0.2671871504983838, "learning_rate": 9.913388167193615e-06, "loss": 0.1747, "step": 6485 }, { "epoch": 0.5177616348686837, "grad_norm": 0.3165359959018013, "learning_rate": 9.910802806923791e-06, "loss": 0.1919, "step": 6486 }, { "epoch": 0.5178414624411272, "grad_norm": 0.31467734894534155, "learning_rate": 9.908217452616447e-06, "loss": 0.1682, "step": 6487 }, { "epoch": 0.5179212900135707, "grad_norm": 0.2743415130490446, "learning_rate": 9.905632104444398e-06, "loss": 0.2196, "step": 6488 }, { "epoch": 0.5180011175860142, "grad_norm": 0.2500724379772735, "learning_rate": 9.903046762580473e-06, "loss": 0.167, "step": 6489 }, { "epoch": 0.5180809451584577, "grad_norm": 0.270233233168845, "learning_rate": 9.900461427197483e-06, "loss": 0.1206, "step": 6490 }, { "epoch": 0.5181607727309012, "grad_norm": 0.2912726083548871, "learning_rate": 9.89787609846826e-06, "loss": 0.1429, "step": 6491 }, { "epoch": 0.5182406003033447, "grad_norm": 0.3111007656894265, "learning_rate": 9.89529077656561e-06, "loss": 0.1932, "step": 6492 }, { "epoch": 0.5183204278757882, "grad_norm": 0.30077202422293514, "learning_rate": 9.89270546166236e-06, "loss": 0.1655, "step": 6493 }, { "epoch": 0.5184002554482319, "grad_norm": 0.28385686765968315, "learning_rate": 9.890120153931325e-06, "loss": 0.1787, "step": 6494 }, { "epoch": 0.5184800830206754, "grad_norm": 0.2895231240865745, "learning_rate": 9.88753485354532e-06, "loss": 0.1862, "step": 6495 }, { "epoch": 0.5185599105931189, "grad_norm": 0.3183170808374707, "learning_rate": 9.88494956067717e-06, "loss": 0.1561, "step": 6496 }, { "epoch": 0.5186397381655624, "grad_norm": 0.24570603347012637, "learning_rate": 9.882364275499682e-06, "loss": 0.1788, "step": 6497 }, { "epoch": 0.5187195657380059, "grad_norm": 0.2797085139747658, "learning_rate": 9.87977899818568e-06, "loss": 0.1357, "step": 6498 }, { "epoch": 0.5187993933104494, "grad_norm": 0.26895958199492526, "learning_rate": 9.877193728907974e-06, "loss": 0.16, "step": 6499 }, { "epoch": 0.518879220882893, "grad_norm": 0.2804147685318904, "learning_rate": 9.874608467839382e-06, "loss": 0.1653, "step": 6500 }, { "epoch": 0.5189590484553365, "grad_norm": 0.2740716215695734, "learning_rate": 9.872023215152719e-06, "loss": 0.2046, "step": 6501 }, { "epoch": 0.51903887602778, "grad_norm": 0.25515661785479493, "learning_rate": 9.869437971020795e-06, "loss": 0.1342, "step": 6502 }, { "epoch": 0.5191187036002235, "grad_norm": 0.30166702600935524, "learning_rate": 9.866852735616428e-06, "loss": 0.1365, "step": 6503 }, { "epoch": 0.519198531172667, "grad_norm": 0.2976785074410713, "learning_rate": 9.864267509112427e-06, "loss": 0.1574, "step": 6504 }, { "epoch": 0.5192783587451105, "grad_norm": 0.31896463864852787, "learning_rate": 9.861682291681608e-06, "loss": 0.1631, "step": 6505 }, { "epoch": 0.519358186317554, "grad_norm": 0.26587484967445707, "learning_rate": 9.859097083496776e-06, "loss": 0.1463, "step": 6506 }, { "epoch": 0.5194380138899976, "grad_norm": 0.30905922685906256, "learning_rate": 9.856511884730751e-06, "loss": 0.1585, "step": 6507 }, { "epoch": 0.5195178414624412, "grad_norm": 0.30595291790763546, "learning_rate": 9.853926695556341e-06, "loss": 0.172, "step": 6508 }, { "epoch": 0.5195976690348847, "grad_norm": 0.2488494652055279, "learning_rate": 9.851341516146347e-06, "loss": 0.1518, "step": 6509 }, { "epoch": 0.5196774966073282, "grad_norm": 0.3300087915920866, "learning_rate": 9.84875634667359e-06, "loss": 0.1321, "step": 6510 }, { "epoch": 0.5197573241797717, "grad_norm": 0.3398805464879875, "learning_rate": 9.846171187310865e-06, "loss": 0.1622, "step": 6511 }, { "epoch": 0.5198371517522152, "grad_norm": 0.2787058895521535, "learning_rate": 9.843586038230993e-06, "loss": 0.2248, "step": 6512 }, { "epoch": 0.5199169793246587, "grad_norm": 0.3083886656943166, "learning_rate": 9.841000899606772e-06, "loss": 0.1389, "step": 6513 }, { "epoch": 0.5199968068971023, "grad_norm": 0.29890608037852434, "learning_rate": 9.838415771611016e-06, "loss": 0.187, "step": 6514 }, { "epoch": 0.5200766344695458, "grad_norm": 0.3785098694610157, "learning_rate": 9.835830654416527e-06, "loss": 0.1358, "step": 6515 }, { "epoch": 0.5201564620419893, "grad_norm": 0.45528056967280844, "learning_rate": 9.833245548196102e-06, "loss": 0.1434, "step": 6516 }, { "epoch": 0.5202362896144328, "grad_norm": 0.306924451731955, "learning_rate": 9.83066045312256e-06, "loss": 0.1626, "step": 6517 }, { "epoch": 0.5203161171868763, "grad_norm": 0.2646736529351586, "learning_rate": 9.82807536936869e-06, "loss": 0.1551, "step": 6518 }, { "epoch": 0.5203959447593198, "grad_norm": 0.37795157303473237, "learning_rate": 9.825490297107308e-06, "loss": 0.1829, "step": 6519 }, { "epoch": 0.5204757723317633, "grad_norm": 0.31336452427660944, "learning_rate": 9.822905236511202e-06, "loss": 0.1404, "step": 6520 }, { "epoch": 0.520555599904207, "grad_norm": 0.3388263288444242, "learning_rate": 9.820320187753185e-06, "loss": 0.1746, "step": 6521 }, { "epoch": 0.5206354274766505, "grad_norm": 0.2629968314898845, "learning_rate": 9.817735151006049e-06, "loss": 0.1624, "step": 6522 }, { "epoch": 0.520715255049094, "grad_norm": 0.3069239872161335, "learning_rate": 9.8151501264426e-06, "loss": 0.1696, "step": 6523 }, { "epoch": 0.5207950826215375, "grad_norm": 0.2835068683722035, "learning_rate": 9.812565114235635e-06, "loss": 0.1541, "step": 6524 }, { "epoch": 0.520874910193981, "grad_norm": 0.23916426360380277, "learning_rate": 9.809980114557945e-06, "loss": 0.1509, "step": 6525 }, { "epoch": 0.5209547377664245, "grad_norm": 0.30218375467200825, "learning_rate": 9.807395127582335e-06, "loss": 0.1888, "step": 6526 }, { "epoch": 0.521034565338868, "grad_norm": 0.27356231708625117, "learning_rate": 9.804810153481598e-06, "loss": 0.1805, "step": 6527 }, { "epoch": 0.5211143929113116, "grad_norm": 0.26078743838739943, "learning_rate": 9.80222519242853e-06, "loss": 0.1544, "step": 6528 }, { "epoch": 0.5211942204837551, "grad_norm": 0.272016177726165, "learning_rate": 9.799640244595923e-06, "loss": 0.1517, "step": 6529 }, { "epoch": 0.5212740480561986, "grad_norm": 0.2898828505783766, "learning_rate": 9.797055310156577e-06, "loss": 0.1538, "step": 6530 }, { "epoch": 0.5213538756286421, "grad_norm": 0.35906312816717467, "learning_rate": 9.79447038928328e-06, "loss": 0.1633, "step": 6531 }, { "epoch": 0.5214337032010856, "grad_norm": 0.2969467930799478, "learning_rate": 9.79188548214882e-06, "loss": 0.1845, "step": 6532 }, { "epoch": 0.5215135307735291, "grad_norm": 0.26757617852517523, "learning_rate": 9.789300588925996e-06, "loss": 0.1747, "step": 6533 }, { "epoch": 0.5215933583459726, "grad_norm": 0.28013446043950246, "learning_rate": 9.78671570978759e-06, "loss": 0.1766, "step": 6534 }, { "epoch": 0.5216731859184163, "grad_norm": 0.32635583643582083, "learning_rate": 9.784130844906399e-06, "loss": 0.1561, "step": 6535 }, { "epoch": 0.5217530134908598, "grad_norm": 0.2822271023152687, "learning_rate": 9.781545994455202e-06, "loss": 0.1623, "step": 6536 }, { "epoch": 0.5218328410633033, "grad_norm": 0.2729372602978765, "learning_rate": 9.778961158606796e-06, "loss": 0.1403, "step": 6537 }, { "epoch": 0.5219126686357468, "grad_norm": 0.29190315269307876, "learning_rate": 9.776376337533959e-06, "loss": 0.1619, "step": 6538 }, { "epoch": 0.5219924962081903, "grad_norm": 0.2804061316234054, "learning_rate": 9.773791531409477e-06, "loss": 0.1696, "step": 6539 }, { "epoch": 0.5220723237806338, "grad_norm": 0.269645761531644, "learning_rate": 9.771206740406141e-06, "loss": 0.1651, "step": 6540 }, { "epoch": 0.5221521513530774, "grad_norm": 0.2551566029302213, "learning_rate": 9.768621964696724e-06, "loss": 0.1284, "step": 6541 }, { "epoch": 0.5222319789255209, "grad_norm": 0.3557137803350791, "learning_rate": 9.766037204454016e-06, "loss": 0.1718, "step": 6542 }, { "epoch": 0.5223118064979644, "grad_norm": 0.28525691689402843, "learning_rate": 9.763452459850793e-06, "loss": 0.1583, "step": 6543 }, { "epoch": 0.5223916340704079, "grad_norm": 0.263209000279571, "learning_rate": 9.760867731059839e-06, "loss": 0.1744, "step": 6544 }, { "epoch": 0.5224714616428514, "grad_norm": 0.3684738978507003, "learning_rate": 9.75828301825393e-06, "loss": 0.2064, "step": 6545 }, { "epoch": 0.5225512892152949, "grad_norm": 0.28040421901687285, "learning_rate": 9.755698321605843e-06, "loss": 0.1635, "step": 6546 }, { "epoch": 0.5226311167877384, "grad_norm": 0.27560840116354657, "learning_rate": 9.753113641288358e-06, "loss": 0.202, "step": 6547 }, { "epoch": 0.5227109443601821, "grad_norm": 0.32577364860537894, "learning_rate": 9.750528977474248e-06, "loss": 0.2018, "step": 6548 }, { "epoch": 0.5227907719326256, "grad_norm": 0.27165140903587387, "learning_rate": 9.74794433033629e-06, "loss": 0.1684, "step": 6549 }, { "epoch": 0.5228705995050691, "grad_norm": 0.2758190149639897, "learning_rate": 9.745359700047255e-06, "loss": 0.1587, "step": 6550 }, { "epoch": 0.5229504270775126, "grad_norm": 0.3284192489468135, "learning_rate": 9.742775086779917e-06, "loss": 0.1705, "step": 6551 }, { "epoch": 0.5230302546499561, "grad_norm": 0.3087018633780213, "learning_rate": 9.740190490707045e-06, "loss": 0.1767, "step": 6552 }, { "epoch": 0.5231100822223996, "grad_norm": 0.27180389634588475, "learning_rate": 9.737605912001413e-06, "loss": 0.1395, "step": 6553 }, { "epoch": 0.5231899097948431, "grad_norm": 0.3029435069751364, "learning_rate": 9.73502135083579e-06, "loss": 0.1497, "step": 6554 }, { "epoch": 0.5232697373672867, "grad_norm": 0.28364770663726074, "learning_rate": 9.732436807382934e-06, "loss": 0.2012, "step": 6555 }, { "epoch": 0.5233495649397302, "grad_norm": 0.3133722536115793, "learning_rate": 9.729852281815626e-06, "loss": 0.156, "step": 6556 }, { "epoch": 0.5234293925121737, "grad_norm": 0.31139154988938067, "learning_rate": 9.727267774306614e-06, "loss": 0.1284, "step": 6557 }, { "epoch": 0.5235092200846172, "grad_norm": 0.31637564785206956, "learning_rate": 9.72468328502868e-06, "loss": 0.2111, "step": 6558 }, { "epoch": 0.5235890476570607, "grad_norm": 0.2621109668811609, "learning_rate": 9.722098814154575e-06, "loss": 0.187, "step": 6559 }, { "epoch": 0.5236688752295042, "grad_norm": 0.2960648321842189, "learning_rate": 9.719514361857067e-06, "loss": 0.1349, "step": 6560 }, { "epoch": 0.5237487028019477, "grad_norm": 0.3362068651601594, "learning_rate": 9.716929928308916e-06, "loss": 0.1891, "step": 6561 }, { "epoch": 0.5238285303743914, "grad_norm": 0.2952705025414377, "learning_rate": 9.714345513682873e-06, "loss": 0.1782, "step": 6562 }, { "epoch": 0.5239083579468349, "grad_norm": 0.27644358100872396, "learning_rate": 9.711761118151708e-06, "loss": 0.1357, "step": 6563 }, { "epoch": 0.5239881855192784, "grad_norm": 0.34663246785051416, "learning_rate": 9.709176741888164e-06, "loss": 0.2005, "step": 6564 }, { "epoch": 0.5240680130917219, "grad_norm": 0.28465329046427906, "learning_rate": 9.706592385065012e-06, "loss": 0.1656, "step": 6565 }, { "epoch": 0.5241478406641654, "grad_norm": 0.2503853514845605, "learning_rate": 9.70400804785499e-06, "loss": 0.1399, "step": 6566 }, { "epoch": 0.5242276682366089, "grad_norm": 0.278149837644806, "learning_rate": 9.701423730430867e-06, "loss": 0.1517, "step": 6567 }, { "epoch": 0.5243074958090524, "grad_norm": 0.26266543873019427, "learning_rate": 9.698839432965383e-06, "loss": 0.1404, "step": 6568 }, { "epoch": 0.524387323381496, "grad_norm": 0.30041430121019636, "learning_rate": 9.696255155631288e-06, "loss": 0.2028, "step": 6569 }, { "epoch": 0.5244671509539395, "grad_norm": 0.2930182990165793, "learning_rate": 9.693670898601338e-06, "loss": 0.1503, "step": 6570 }, { "epoch": 0.524546978526383, "grad_norm": 0.2594746224638418, "learning_rate": 9.691086662048273e-06, "loss": 0.2055, "step": 6571 }, { "epoch": 0.5246268060988265, "grad_norm": 0.30188882510339143, "learning_rate": 9.688502446144843e-06, "loss": 0.1641, "step": 6572 }, { "epoch": 0.52470663367127, "grad_norm": 0.33921293152385534, "learning_rate": 9.68591825106379e-06, "loss": 0.1676, "step": 6573 }, { "epoch": 0.5247864612437135, "grad_norm": 0.29080573411863103, "learning_rate": 9.683334076977864e-06, "loss": 0.1532, "step": 6574 }, { "epoch": 0.5248662888161572, "grad_norm": 0.24594164074511685, "learning_rate": 9.680749924059799e-06, "loss": 0.1943, "step": 6575 }, { "epoch": 0.5249461163886007, "grad_norm": 0.320697230152777, "learning_rate": 9.678165792482336e-06, "loss": 0.1875, "step": 6576 }, { "epoch": 0.5250259439610442, "grad_norm": 0.28661275331117303, "learning_rate": 9.675581682418219e-06, "loss": 0.1766, "step": 6577 }, { "epoch": 0.5251057715334877, "grad_norm": 0.28886562419955714, "learning_rate": 9.67299759404018e-06, "loss": 0.1705, "step": 6578 }, { "epoch": 0.5251855991059312, "grad_norm": 0.2961857145725754, "learning_rate": 9.670413527520961e-06, "loss": 0.1614, "step": 6579 }, { "epoch": 0.5252654266783747, "grad_norm": 0.2849344252937496, "learning_rate": 9.66782948303329e-06, "loss": 0.1491, "step": 6580 }, { "epoch": 0.5253452542508182, "grad_norm": 0.35598653718728657, "learning_rate": 9.665245460749907e-06, "loss": 0.1638, "step": 6581 }, { "epoch": 0.5254250818232618, "grad_norm": 0.3470973683217886, "learning_rate": 9.662661460843538e-06, "loss": 0.1453, "step": 6582 }, { "epoch": 0.5255049093957053, "grad_norm": 0.32556871192384235, "learning_rate": 9.660077483486914e-06, "loss": 0.1983, "step": 6583 }, { "epoch": 0.5255847369681488, "grad_norm": 0.32650075074108337, "learning_rate": 9.657493528852769e-06, "loss": 0.1892, "step": 6584 }, { "epoch": 0.5256645645405923, "grad_norm": 0.3847426463730089, "learning_rate": 9.654909597113822e-06, "loss": 0.2098, "step": 6585 }, { "epoch": 0.5257443921130358, "grad_norm": 0.3019221440650363, "learning_rate": 9.652325688442807e-06, "loss": 0.1389, "step": 6586 }, { "epoch": 0.5258242196854793, "grad_norm": 0.23296829158291912, "learning_rate": 9.64974180301244e-06, "loss": 0.1629, "step": 6587 }, { "epoch": 0.5259040472579228, "grad_norm": 0.3456073090542977, "learning_rate": 9.64715794099545e-06, "loss": 0.1762, "step": 6588 }, { "epoch": 0.5259838748303665, "grad_norm": 0.358035520723682, "learning_rate": 9.644574102564553e-06, "loss": 0.1676, "step": 6589 }, { "epoch": 0.52606370240281, "grad_norm": 0.25582331839597355, "learning_rate": 9.641990287892471e-06, "loss": 0.1864, "step": 6590 }, { "epoch": 0.5261435299752535, "grad_norm": 0.3220436699088228, "learning_rate": 9.639406497151925e-06, "loss": 0.1533, "step": 6591 }, { "epoch": 0.526223357547697, "grad_norm": 0.3207053420685331, "learning_rate": 9.636822730515624e-06, "loss": 0.1584, "step": 6592 }, { "epoch": 0.5263031851201405, "grad_norm": 0.32852556287754076, "learning_rate": 9.634238988156287e-06, "loss": 0.1584, "step": 6593 }, { "epoch": 0.526383012692584, "grad_norm": 0.2729534256260141, "learning_rate": 9.631655270246626e-06, "loss": 0.14, "step": 6594 }, { "epoch": 0.5264628402650275, "grad_norm": 0.27172706489752124, "learning_rate": 9.629071576959353e-06, "loss": 0.1332, "step": 6595 }, { "epoch": 0.5265426678374711, "grad_norm": 0.33969659303506644, "learning_rate": 9.626487908467177e-06, "loss": 0.1615, "step": 6596 }, { "epoch": 0.5266224954099146, "grad_norm": 0.32909895235388625, "learning_rate": 9.623904264942807e-06, "loss": 0.1505, "step": 6597 }, { "epoch": 0.5267023229823581, "grad_norm": 0.28088928765961935, "learning_rate": 9.62132064655895e-06, "loss": 0.1475, "step": 6598 }, { "epoch": 0.5267821505548016, "grad_norm": 0.22820096817713578, "learning_rate": 9.618737053488304e-06, "loss": 0.128, "step": 6599 }, { "epoch": 0.5268619781272451, "grad_norm": 0.3325137016896052, "learning_rate": 9.616153485903583e-06, "loss": 0.1544, "step": 6600 }, { "epoch": 0.5269418056996886, "grad_norm": 0.30608865659801954, "learning_rate": 9.613569943977477e-06, "loss": 0.1301, "step": 6601 }, { "epoch": 0.5270216332721321, "grad_norm": 0.30873474504664317, "learning_rate": 9.610986427882696e-06, "loss": 0.188, "step": 6602 }, { "epoch": 0.5271014608445758, "grad_norm": 0.2664185557779021, "learning_rate": 9.608402937791926e-06, "loss": 0.1955, "step": 6603 }, { "epoch": 0.5271812884170193, "grad_norm": 0.32441309787255773, "learning_rate": 9.605819473877875e-06, "loss": 0.1736, "step": 6604 }, { "epoch": 0.5272611159894628, "grad_norm": 0.30578374558390264, "learning_rate": 9.603236036313234e-06, "loss": 0.2402, "step": 6605 }, { "epoch": 0.5273409435619063, "grad_norm": 0.3077184434943824, "learning_rate": 9.600652625270689e-06, "loss": 0.1637, "step": 6606 }, { "epoch": 0.5274207711343498, "grad_norm": 0.32696879867706385, "learning_rate": 9.598069240922942e-06, "loss": 0.1937, "step": 6607 }, { "epoch": 0.5275005987067933, "grad_norm": 0.29686747748965286, "learning_rate": 9.595485883442667e-06, "loss": 0.1498, "step": 6608 }, { "epoch": 0.5275804262792368, "grad_norm": 0.32320534111035354, "learning_rate": 9.592902553002568e-06, "loss": 0.1515, "step": 6609 }, { "epoch": 0.5276602538516804, "grad_norm": 0.41914362600489663, "learning_rate": 9.590319249775316e-06, "loss": 0.1761, "step": 6610 }, { "epoch": 0.5277400814241239, "grad_norm": 0.2582131290762042, "learning_rate": 9.587735973933604e-06, "loss": 0.1369, "step": 6611 }, { "epoch": 0.5278199089965674, "grad_norm": 0.3155877133699178, "learning_rate": 9.585152725650112e-06, "loss": 0.1949, "step": 6612 }, { "epoch": 0.5278997365690109, "grad_norm": 0.25351497838409254, "learning_rate": 9.582569505097516e-06, "loss": 0.1792, "step": 6613 }, { "epoch": 0.5279795641414544, "grad_norm": 0.2556270133020711, "learning_rate": 9.579986312448499e-06, "loss": 0.1496, "step": 6614 }, { "epoch": 0.5280593917138979, "grad_norm": 0.3048212814223406, "learning_rate": 9.577403147875731e-06, "loss": 0.1535, "step": 6615 }, { "epoch": 0.5281392192863416, "grad_norm": 0.3094974880375037, "learning_rate": 9.574820011551893e-06, "loss": 0.118, "step": 6616 }, { "epoch": 0.5282190468587851, "grad_norm": 0.2972886962298302, "learning_rate": 9.572236903649652e-06, "loss": 0.1647, "step": 6617 }, { "epoch": 0.5282988744312286, "grad_norm": 0.2649611282757212, "learning_rate": 9.569653824341684e-06, "loss": 0.1685, "step": 6618 }, { "epoch": 0.5283787020036721, "grad_norm": 0.3289303070761181, "learning_rate": 9.567070773800654e-06, "loss": 0.2376, "step": 6619 }, { "epoch": 0.5284585295761156, "grad_norm": 0.26300585926467185, "learning_rate": 9.564487752199229e-06, "loss": 0.1902, "step": 6620 }, { "epoch": 0.5285383571485591, "grad_norm": 0.30249629547969775, "learning_rate": 9.561904759710076e-06, "loss": 0.1629, "step": 6621 }, { "epoch": 0.5286181847210026, "grad_norm": 0.33655019371244715, "learning_rate": 9.559321796505854e-06, "loss": 0.2146, "step": 6622 }, { "epoch": 0.5286980122934462, "grad_norm": 0.2649236816116807, "learning_rate": 9.556738862759227e-06, "loss": 0.1376, "step": 6623 }, { "epoch": 0.5287778398658897, "grad_norm": 0.2559119616697654, "learning_rate": 9.554155958642851e-06, "loss": 0.166, "step": 6624 }, { "epoch": 0.5288576674383332, "grad_norm": 0.2925169588249997, "learning_rate": 9.551573084329387e-06, "loss": 0.1488, "step": 6625 }, { "epoch": 0.5289374950107767, "grad_norm": 0.3031813957595212, "learning_rate": 9.548990239991487e-06, "loss": 0.1603, "step": 6626 }, { "epoch": 0.5290173225832202, "grad_norm": 0.30587508208104025, "learning_rate": 9.546407425801804e-06, "loss": 0.1711, "step": 6627 }, { "epoch": 0.5290971501556637, "grad_norm": 0.29613512051201096, "learning_rate": 9.543824641932993e-06, "loss": 0.2282, "step": 6628 }, { "epoch": 0.5291769777281072, "grad_norm": 0.295995894107861, "learning_rate": 9.541241888557694e-06, "loss": 0.1305, "step": 6629 }, { "epoch": 0.5292568053005509, "grad_norm": 0.256368441607277, "learning_rate": 9.538659165848563e-06, "loss": 0.1513, "step": 6630 }, { "epoch": 0.5293366328729944, "grad_norm": 0.27592857263380743, "learning_rate": 9.536076473978239e-06, "loss": 0.116, "step": 6631 }, { "epoch": 0.5294164604454379, "grad_norm": 0.30598843036462414, "learning_rate": 9.533493813119368e-06, "loss": 0.1724, "step": 6632 }, { "epoch": 0.5294962880178814, "grad_norm": 0.2893589388947021, "learning_rate": 9.530911183444587e-06, "loss": 0.1718, "step": 6633 }, { "epoch": 0.5295761155903249, "grad_norm": 0.2361338975346305, "learning_rate": 9.528328585126541e-06, "loss": 0.1897, "step": 6634 }, { "epoch": 0.5296559431627684, "grad_norm": 0.31113661493924916, "learning_rate": 9.525746018337862e-06, "loss": 0.1791, "step": 6635 }, { "epoch": 0.5297357707352119, "grad_norm": 0.2846723078708819, "learning_rate": 9.523163483251181e-06, "loss": 0.1552, "step": 6636 }, { "epoch": 0.5298155983076555, "grad_norm": 0.26287196970497456, "learning_rate": 9.520580980039138e-06, "loss": 0.1577, "step": 6637 }, { "epoch": 0.529895425880099, "grad_norm": 0.29501518558405593, "learning_rate": 9.517998508874356e-06, "loss": 0.1779, "step": 6638 }, { "epoch": 0.5299752534525425, "grad_norm": 0.3137005824674491, "learning_rate": 9.515416069929469e-06, "loss": 0.1927, "step": 6639 }, { "epoch": 0.530055081024986, "grad_norm": 0.26672263855454215, "learning_rate": 9.512833663377099e-06, "loss": 0.1465, "step": 6640 }, { "epoch": 0.5301349085974295, "grad_norm": 0.2577228300872788, "learning_rate": 9.51025128938987e-06, "loss": 0.173, "step": 6641 }, { "epoch": 0.530214736169873, "grad_norm": 0.36053842830173377, "learning_rate": 9.507668948140408e-06, "loss": 0.201, "step": 6642 }, { "epoch": 0.5302945637423166, "grad_norm": 0.3175083525622903, "learning_rate": 9.505086639801322e-06, "loss": 0.1744, "step": 6643 }, { "epoch": 0.5303743913147602, "grad_norm": 0.26847365980637283, "learning_rate": 9.502504364545242e-06, "loss": 0.1871, "step": 6644 }, { "epoch": 0.5304542188872037, "grad_norm": 0.3079863245660254, "learning_rate": 9.49992212254477e-06, "loss": 0.168, "step": 6645 }, { "epoch": 0.5305340464596472, "grad_norm": 0.32303544957280966, "learning_rate": 9.497339913972531e-06, "loss": 0.1638, "step": 6646 }, { "epoch": 0.5306138740320907, "grad_norm": 0.2801827540916302, "learning_rate": 9.494757739001123e-06, "loss": 0.1549, "step": 6647 }, { "epoch": 0.5306937016045342, "grad_norm": 0.3590370044649828, "learning_rate": 9.492175597803167e-06, "loss": 0.1771, "step": 6648 }, { "epoch": 0.5307735291769777, "grad_norm": 0.28321773509566384, "learning_rate": 9.489593490551261e-06, "loss": 0.173, "step": 6649 }, { "epoch": 0.5308533567494212, "grad_norm": 0.27577473275973785, "learning_rate": 9.48701141741801e-06, "loss": 0.1729, "step": 6650 }, { "epoch": 0.5309331843218648, "grad_norm": 0.37391470607628696, "learning_rate": 9.484429378576015e-06, "loss": 0.105, "step": 6651 }, { "epoch": 0.5310130118943083, "grad_norm": 0.3679767575478076, "learning_rate": 9.481847374197872e-06, "loss": 0.1559, "step": 6652 }, { "epoch": 0.5310928394667518, "grad_norm": 0.3094713610294327, "learning_rate": 9.479265404456188e-06, "loss": 0.1551, "step": 6653 }, { "epoch": 0.5311726670391953, "grad_norm": 0.40145109161140197, "learning_rate": 9.476683469523545e-06, "loss": 0.1659, "step": 6654 }, { "epoch": 0.5312524946116388, "grad_norm": 0.29832449239128705, "learning_rate": 9.474101569572547e-06, "loss": 0.1705, "step": 6655 }, { "epoch": 0.5313323221840823, "grad_norm": 0.2519804139729023, "learning_rate": 9.471519704775772e-06, "loss": 0.1869, "step": 6656 }, { "epoch": 0.531412149756526, "grad_norm": 0.2874813632126144, "learning_rate": 9.46893787530582e-06, "loss": 0.1341, "step": 6657 }, { "epoch": 0.5314919773289695, "grad_norm": 0.3173885459831723, "learning_rate": 9.466356081335266e-06, "loss": 0.18, "step": 6658 }, { "epoch": 0.531571804901413, "grad_norm": 0.2600850058700771, "learning_rate": 9.463774323036695e-06, "loss": 0.1822, "step": 6659 }, { "epoch": 0.5316516324738565, "grad_norm": 0.29806595875807024, "learning_rate": 9.46119260058269e-06, "loss": 0.1879, "step": 6660 }, { "epoch": 0.5317314600463, "grad_norm": 0.3335086645584974, "learning_rate": 9.458610914145826e-06, "loss": 0.1998, "step": 6661 }, { "epoch": 0.5318112876187435, "grad_norm": 0.2849919820453783, "learning_rate": 9.456029263898681e-06, "loss": 0.1842, "step": 6662 }, { "epoch": 0.531891115191187, "grad_norm": 0.33620584618626154, "learning_rate": 9.453447650013826e-06, "loss": 0.1924, "step": 6663 }, { "epoch": 0.5319709427636305, "grad_norm": 0.34276537311811583, "learning_rate": 9.450866072663834e-06, "loss": 0.1453, "step": 6664 }, { "epoch": 0.5320507703360741, "grad_norm": 0.30521680482091057, "learning_rate": 9.448284532021272e-06, "loss": 0.1433, "step": 6665 }, { "epoch": 0.5321305979085176, "grad_norm": 0.27430278499710575, "learning_rate": 9.445703028258703e-06, "loss": 0.1691, "step": 6666 }, { "epoch": 0.5322104254809611, "grad_norm": 0.313445044369516, "learning_rate": 9.443121561548695e-06, "loss": 0.1772, "step": 6667 }, { "epoch": 0.5322902530534046, "grad_norm": 0.31666277205336985, "learning_rate": 9.440540132063807e-06, "loss": 0.1437, "step": 6668 }, { "epoch": 0.5323700806258481, "grad_norm": 0.26624445260075513, "learning_rate": 9.437958739976597e-06, "loss": 0.1954, "step": 6669 }, { "epoch": 0.5324499081982917, "grad_norm": 0.24406611070169418, "learning_rate": 9.43537738545962e-06, "loss": 0.1647, "step": 6670 }, { "epoch": 0.5325297357707353, "grad_norm": 0.3057285656515492, "learning_rate": 9.432796068685432e-06, "loss": 0.2145, "step": 6671 }, { "epoch": 0.5326095633431788, "grad_norm": 0.3058646996203075, "learning_rate": 9.430214789826581e-06, "loss": 0.1464, "step": 6672 }, { "epoch": 0.5326893909156223, "grad_norm": 0.3047280019805207, "learning_rate": 9.427633549055616e-06, "loss": 0.1707, "step": 6673 }, { "epoch": 0.5327692184880658, "grad_norm": 0.2710739686816769, "learning_rate": 9.425052346545085e-06, "loss": 0.138, "step": 6674 }, { "epoch": 0.5328490460605093, "grad_norm": 0.3073079284797041, "learning_rate": 9.422471182467526e-06, "loss": 0.2053, "step": 6675 }, { "epoch": 0.5329288736329528, "grad_norm": 0.31605547737626394, "learning_rate": 9.419890056995487e-06, "loss": 0.1521, "step": 6676 }, { "epoch": 0.5330087012053963, "grad_norm": 0.30860129940177367, "learning_rate": 9.417308970301499e-06, "loss": 0.1836, "step": 6677 }, { "epoch": 0.5330885287778399, "grad_norm": 0.3272662186760193, "learning_rate": 9.414727922558102e-06, "loss": 0.1593, "step": 6678 }, { "epoch": 0.5331683563502834, "grad_norm": 0.2988559988255785, "learning_rate": 9.412146913937827e-06, "loss": 0.1724, "step": 6679 }, { "epoch": 0.5332481839227269, "grad_norm": 0.2548988468862832, "learning_rate": 9.409565944613203e-06, "loss": 0.1783, "step": 6680 }, { "epoch": 0.5333280114951704, "grad_norm": 0.2446664577921652, "learning_rate": 9.406985014756761e-06, "loss": 0.1564, "step": 6681 }, { "epoch": 0.5334078390676139, "grad_norm": 0.2923026361371228, "learning_rate": 9.404404124541021e-06, "loss": 0.1221, "step": 6682 }, { "epoch": 0.5334876666400574, "grad_norm": 0.2998233847893707, "learning_rate": 9.40182327413851e-06, "loss": 0.1853, "step": 6683 }, { "epoch": 0.533567494212501, "grad_norm": 0.23769041271197475, "learning_rate": 9.399242463721746e-06, "loss": 0.1748, "step": 6684 }, { "epoch": 0.5336473217849446, "grad_norm": 0.3215425907885044, "learning_rate": 9.396661693463247e-06, "loss": 0.1887, "step": 6685 }, { "epoch": 0.5337271493573881, "grad_norm": 0.27638437352474193, "learning_rate": 9.394080963535523e-06, "loss": 0.1315, "step": 6686 }, { "epoch": 0.5338069769298316, "grad_norm": 0.26430701061431877, "learning_rate": 9.391500274111093e-06, "loss": 0.1709, "step": 6687 }, { "epoch": 0.5338868045022751, "grad_norm": 0.2593581539383212, "learning_rate": 9.388919625362461e-06, "loss": 0.1528, "step": 6688 }, { "epoch": 0.5339666320747186, "grad_norm": 0.3524260323396796, "learning_rate": 9.38633901746213e-06, "loss": 0.1698, "step": 6689 }, { "epoch": 0.5340464596471621, "grad_norm": 0.29341588114960776, "learning_rate": 9.383758450582612e-06, "loss": 0.1955, "step": 6690 }, { "epoch": 0.5341262872196056, "grad_norm": 0.3071758927979034, "learning_rate": 9.381177924896396e-06, "loss": 0.1437, "step": 6691 }, { "epoch": 0.5342061147920492, "grad_norm": 0.2491920096885741, "learning_rate": 9.378597440575995e-06, "loss": 0.1691, "step": 6692 }, { "epoch": 0.5342859423644927, "grad_norm": 0.2856211794718231, "learning_rate": 9.376016997793887e-06, "loss": 0.2013, "step": 6693 }, { "epoch": 0.5343657699369362, "grad_norm": 0.28219570934847954, "learning_rate": 9.373436596722583e-06, "loss": 0.1655, "step": 6694 }, { "epoch": 0.5344455975093797, "grad_norm": 0.30905440776406445, "learning_rate": 9.370856237534557e-06, "loss": 0.1482, "step": 6695 }, { "epoch": 0.5345254250818232, "grad_norm": 0.26368088377525845, "learning_rate": 9.368275920402304e-06, "loss": 0.1686, "step": 6696 }, { "epoch": 0.5346052526542668, "grad_norm": 0.3241105225154925, "learning_rate": 9.365695645498304e-06, "loss": 0.1665, "step": 6697 }, { "epoch": 0.5346850802267104, "grad_norm": 0.30577762342454107, "learning_rate": 9.363115412995038e-06, "loss": 0.1513, "step": 6698 }, { "epoch": 0.5347649077991539, "grad_norm": 0.2845675258619845, "learning_rate": 9.36053522306499e-06, "loss": 0.1669, "step": 6699 }, { "epoch": 0.5348447353715974, "grad_norm": 0.2789996921625957, "learning_rate": 9.357955075880628e-06, "loss": 0.1536, "step": 6700 }, { "epoch": 0.5349245629440409, "grad_norm": 0.2664178574330297, "learning_rate": 9.355374971614434e-06, "loss": 0.1564, "step": 6701 }, { "epoch": 0.5350043905164844, "grad_norm": 0.2960259759067295, "learning_rate": 9.352794910438869e-06, "loss": 0.1788, "step": 6702 }, { "epoch": 0.5350842180889279, "grad_norm": 0.3132590429409541, "learning_rate": 9.350214892526401e-06, "loss": 0.1995, "step": 6703 }, { "epoch": 0.5351640456613714, "grad_norm": 0.2840578649414546, "learning_rate": 9.347634918049499e-06, "loss": 0.1555, "step": 6704 }, { "epoch": 0.535243873233815, "grad_norm": 0.279622646707745, "learning_rate": 9.345054987180619e-06, "loss": 0.1525, "step": 6705 }, { "epoch": 0.5353237008062585, "grad_norm": 0.3221668219913624, "learning_rate": 9.342475100092224e-06, "loss": 0.1547, "step": 6706 }, { "epoch": 0.535403528378702, "grad_norm": 0.23106344819278937, "learning_rate": 9.339895256956762e-06, "loss": 0.164, "step": 6707 }, { "epoch": 0.5354833559511455, "grad_norm": 0.26335436933621703, "learning_rate": 9.337315457946693e-06, "loss": 0.1433, "step": 6708 }, { "epoch": 0.535563183523589, "grad_norm": 0.3040670860401588, "learning_rate": 9.334735703234464e-06, "loss": 0.1816, "step": 6709 }, { "epoch": 0.5356430110960325, "grad_norm": 0.25346078878546785, "learning_rate": 9.332155992992517e-06, "loss": 0.1893, "step": 6710 }, { "epoch": 0.5357228386684761, "grad_norm": 0.30023848798842373, "learning_rate": 9.329576327393302e-06, "loss": 0.1623, "step": 6711 }, { "epoch": 0.5358026662409197, "grad_norm": 0.27225162393743535, "learning_rate": 9.326996706609253e-06, "loss": 0.1402, "step": 6712 }, { "epoch": 0.5358824938133632, "grad_norm": 0.26353166488673574, "learning_rate": 9.324417130812814e-06, "loss": 0.1537, "step": 6713 }, { "epoch": 0.5359623213858067, "grad_norm": 0.36577293912945147, "learning_rate": 9.321837600176414e-06, "loss": 0.1672, "step": 6714 }, { "epoch": 0.5360421489582502, "grad_norm": 0.3414708659171359, "learning_rate": 9.31925811487249e-06, "loss": 0.1402, "step": 6715 }, { "epoch": 0.5361219765306937, "grad_norm": 0.2666507232207522, "learning_rate": 9.316678675073465e-06, "loss": 0.1258, "step": 6716 }, { "epoch": 0.5362018041031372, "grad_norm": 0.2871924537134079, "learning_rate": 9.314099280951765e-06, "loss": 0.1407, "step": 6717 }, { "epoch": 0.5362816316755807, "grad_norm": 0.2826583310987373, "learning_rate": 9.311519932679816e-06, "loss": 0.1464, "step": 6718 }, { "epoch": 0.5363614592480243, "grad_norm": 0.27269059260540696, "learning_rate": 9.308940630430032e-06, "loss": 0.1635, "step": 6719 }, { "epoch": 0.5364412868204678, "grad_norm": 0.2591009332089451, "learning_rate": 9.306361374374835e-06, "loss": 0.1566, "step": 6720 }, { "epoch": 0.5365211143929113, "grad_norm": 0.25583460058083635, "learning_rate": 9.303782164686632e-06, "loss": 0.1815, "step": 6721 }, { "epoch": 0.5366009419653548, "grad_norm": 0.26284793880678115, "learning_rate": 9.301203001537839e-06, "loss": 0.1575, "step": 6722 }, { "epoch": 0.5366807695377983, "grad_norm": 0.28122191825037296, "learning_rate": 9.298623885100858e-06, "loss": 0.1629, "step": 6723 }, { "epoch": 0.5367605971102419, "grad_norm": 0.3002185937160809, "learning_rate": 9.296044815548096e-06, "loss": 0.1736, "step": 6724 }, { "epoch": 0.5368404246826854, "grad_norm": 0.29300760533944387, "learning_rate": 9.293465793051954e-06, "loss": 0.1797, "step": 6725 }, { "epoch": 0.536920252255129, "grad_norm": 0.31071288245876216, "learning_rate": 9.290886817784823e-06, "loss": 0.1797, "step": 6726 }, { "epoch": 0.5370000798275725, "grad_norm": 0.3024199982839084, "learning_rate": 9.288307889919106e-06, "loss": 0.1449, "step": 6727 }, { "epoch": 0.537079907400016, "grad_norm": 0.3087331970496022, "learning_rate": 9.285729009627188e-06, "loss": 0.2239, "step": 6728 }, { "epoch": 0.5371597349724595, "grad_norm": 0.26157165963662415, "learning_rate": 9.28315017708146e-06, "loss": 0.1392, "step": 6729 }, { "epoch": 0.537239562544903, "grad_norm": 0.2719366622499711, "learning_rate": 9.280571392454303e-06, "loss": 0.1502, "step": 6730 }, { "epoch": 0.5373193901173465, "grad_norm": 0.25173647413773786, "learning_rate": 9.277992655918106e-06, "loss": 0.1788, "step": 6731 }, { "epoch": 0.53739921768979, "grad_norm": 0.31004887228181816, "learning_rate": 9.275413967645243e-06, "loss": 0.2253, "step": 6732 }, { "epoch": 0.5374790452622336, "grad_norm": 0.2644099114351988, "learning_rate": 9.272835327808082e-06, "loss": 0.2031, "step": 6733 }, { "epoch": 0.5375588728346771, "grad_norm": 0.2840205719354496, "learning_rate": 9.270256736579009e-06, "loss": 0.2581, "step": 6734 }, { "epoch": 0.5376387004071206, "grad_norm": 0.2817302766027956, "learning_rate": 9.26767819413038e-06, "loss": 0.1836, "step": 6735 }, { "epoch": 0.5377185279795641, "grad_norm": 0.31781269556805153, "learning_rate": 9.265099700634569e-06, "loss": 0.187, "step": 6736 }, { "epoch": 0.5377983555520076, "grad_norm": 0.3613499748888731, "learning_rate": 9.26252125626393e-06, "loss": 0.2104, "step": 6737 }, { "epoch": 0.5378781831244512, "grad_norm": 0.2592540532848434, "learning_rate": 9.259942861190833e-06, "loss": 0.1791, "step": 6738 }, { "epoch": 0.5379580106968948, "grad_norm": 0.3262087637137605, "learning_rate": 9.257364515587624e-06, "loss": 0.135, "step": 6739 }, { "epoch": 0.5380378382693383, "grad_norm": 0.29885427924353397, "learning_rate": 9.254786219626658e-06, "loss": 0.1505, "step": 6740 }, { "epoch": 0.5381176658417818, "grad_norm": 0.29915926570048307, "learning_rate": 9.252207973480286e-06, "loss": 0.1931, "step": 6741 }, { "epoch": 0.5381974934142253, "grad_norm": 0.29417608193387845, "learning_rate": 9.249629777320848e-06, "loss": 0.1511, "step": 6742 }, { "epoch": 0.5382773209866688, "grad_norm": 0.3296539358848579, "learning_rate": 9.247051631320692e-06, "loss": 0.1413, "step": 6743 }, { "epoch": 0.5383571485591123, "grad_norm": 0.3275355115575605, "learning_rate": 9.24447353565215e-06, "loss": 0.1827, "step": 6744 }, { "epoch": 0.5384369761315558, "grad_norm": 0.30539381770267643, "learning_rate": 9.24189549048757e-06, "loss": 0.1672, "step": 6745 }, { "epoch": 0.5385168037039993, "grad_norm": 0.2669167886971146, "learning_rate": 9.239317495999273e-06, "loss": 0.1701, "step": 6746 }, { "epoch": 0.5385966312764429, "grad_norm": 0.23494144579761983, "learning_rate": 9.236739552359591e-06, "loss": 0.1895, "step": 6747 }, { "epoch": 0.5386764588488864, "grad_norm": 0.30422040330744715, "learning_rate": 9.23416165974085e-06, "loss": 0.1597, "step": 6748 }, { "epoch": 0.5387562864213299, "grad_norm": 0.28658212548735196, "learning_rate": 9.23158381831537e-06, "loss": 0.1835, "step": 6749 }, { "epoch": 0.5388361139937734, "grad_norm": 0.30443738875917103, "learning_rate": 9.229006028255471e-06, "loss": 0.2011, "step": 6750 }, { "epoch": 0.5389159415662169, "grad_norm": 0.3343559713052922, "learning_rate": 9.226428289733468e-06, "loss": 0.1511, "step": 6751 }, { "epoch": 0.5389957691386605, "grad_norm": 0.2863229169937804, "learning_rate": 9.223850602921674e-06, "loss": 0.209, "step": 6752 }, { "epoch": 0.539075596711104, "grad_norm": 0.2960712689984039, "learning_rate": 9.221272967992392e-06, "loss": 0.1574, "step": 6753 }, { "epoch": 0.5391554242835476, "grad_norm": 0.28964444437602826, "learning_rate": 9.218695385117934e-06, "loss": 0.2046, "step": 6754 }, { "epoch": 0.5392352518559911, "grad_norm": 0.3045687612118766, "learning_rate": 9.216117854470597e-06, "loss": 0.1706, "step": 6755 }, { "epoch": 0.5393150794284346, "grad_norm": 0.31633110121392555, "learning_rate": 9.213540376222677e-06, "loss": 0.149, "step": 6756 }, { "epoch": 0.5393949070008781, "grad_norm": 0.3165087225138478, "learning_rate": 9.210962950546474e-06, "loss": 0.137, "step": 6757 }, { "epoch": 0.5394747345733216, "grad_norm": 0.26979908783049766, "learning_rate": 9.208385577614271e-06, "loss": 0.2059, "step": 6758 }, { "epoch": 0.5395545621457651, "grad_norm": 0.3531914958433257, "learning_rate": 9.205808257598363e-06, "loss": 0.2001, "step": 6759 }, { "epoch": 0.5396343897182087, "grad_norm": 0.33393520982513347, "learning_rate": 9.203230990671026e-06, "loss": 0.1664, "step": 6760 }, { "epoch": 0.5397142172906522, "grad_norm": 0.3055611639391023, "learning_rate": 9.200653777004549e-06, "loss": 0.1409, "step": 6761 }, { "epoch": 0.5397940448630957, "grad_norm": 0.27596898232780775, "learning_rate": 9.198076616771201e-06, "loss": 0.1528, "step": 6762 }, { "epoch": 0.5398738724355392, "grad_norm": 0.27093386386544716, "learning_rate": 9.195499510143259e-06, "loss": 0.1933, "step": 6763 }, { "epoch": 0.5399537000079827, "grad_norm": 0.28549629253638265, "learning_rate": 9.19292245729299e-06, "loss": 0.2369, "step": 6764 }, { "epoch": 0.5400335275804263, "grad_norm": 0.3118739893130751, "learning_rate": 9.19034545839266e-06, "loss": 0.1788, "step": 6765 }, { "epoch": 0.5401133551528698, "grad_norm": 0.3181227818825494, "learning_rate": 9.187768513614533e-06, "loss": 0.1744, "step": 6766 }, { "epoch": 0.5401931827253134, "grad_norm": 0.3112004265214332, "learning_rate": 9.185191623130865e-06, "loss": 0.1869, "step": 6767 }, { "epoch": 0.5402730102977569, "grad_norm": 0.2940636174329869, "learning_rate": 9.182614787113915e-06, "loss": 0.1646, "step": 6768 }, { "epoch": 0.5403528378702004, "grad_norm": 0.31421449951834945, "learning_rate": 9.180038005735933e-06, "loss": 0.1294, "step": 6769 }, { "epoch": 0.5404326654426439, "grad_norm": 0.3215139758143995, "learning_rate": 9.17746127916916e-06, "loss": 0.2249, "step": 6770 }, { "epoch": 0.5405124930150874, "grad_norm": 0.3700796256511293, "learning_rate": 9.17488460758585e-06, "loss": 0.1936, "step": 6771 }, { "epoch": 0.5405923205875309, "grad_norm": 0.3089666577509206, "learning_rate": 9.172307991158235e-06, "loss": 0.136, "step": 6772 }, { "epoch": 0.5406721481599744, "grad_norm": 0.2868524562233972, "learning_rate": 9.169731430058557e-06, "loss": 0.1771, "step": 6773 }, { "epoch": 0.540751975732418, "grad_norm": 0.2596936807505408, "learning_rate": 9.167154924459047e-06, "loss": 0.1717, "step": 6774 }, { "epoch": 0.5408318033048615, "grad_norm": 0.341894743809726, "learning_rate": 9.164578474531937e-06, "loss": 0.2094, "step": 6775 }, { "epoch": 0.540911630877305, "grad_norm": 0.31706148813102225, "learning_rate": 9.16200208044945e-06, "loss": 0.2077, "step": 6776 }, { "epoch": 0.5409914584497485, "grad_norm": 0.3084918447280244, "learning_rate": 9.159425742383801e-06, "loss": 0.198, "step": 6777 }, { "epoch": 0.541071286022192, "grad_norm": 0.33926332803127507, "learning_rate": 9.156849460507222e-06, "loss": 0.146, "step": 6778 }, { "epoch": 0.5411511135946356, "grad_norm": 0.2544064090150014, "learning_rate": 9.154273234991914e-06, "loss": 0.1293, "step": 6779 }, { "epoch": 0.5412309411670791, "grad_norm": 0.3255067348250134, "learning_rate": 9.151697066010098e-06, "loss": 0.1622, "step": 6780 }, { "epoch": 0.5413107687395227, "grad_norm": 0.3169021255589966, "learning_rate": 9.149120953733972e-06, "loss": 0.2253, "step": 6781 }, { "epoch": 0.5413905963119662, "grad_norm": 0.28425870556958055, "learning_rate": 9.146544898335747e-06, "loss": 0.2282, "step": 6782 }, { "epoch": 0.5414704238844097, "grad_norm": 0.3330447916415913, "learning_rate": 9.143968899987612e-06, "loss": 0.1294, "step": 6783 }, { "epoch": 0.5415502514568532, "grad_norm": 0.295479112020025, "learning_rate": 9.141392958861776e-06, "loss": 0.1857, "step": 6784 }, { "epoch": 0.5416300790292967, "grad_norm": 0.24846488025826627, "learning_rate": 9.138817075130421e-06, "loss": 0.1456, "step": 6785 }, { "epoch": 0.5417099066017402, "grad_norm": 0.29989594380657536, "learning_rate": 9.136241248965734e-06, "loss": 0.1703, "step": 6786 }, { "epoch": 0.5417897341741837, "grad_norm": 0.26738545594508334, "learning_rate": 9.133665480539903e-06, "loss": 0.2038, "step": 6787 }, { "epoch": 0.5418695617466273, "grad_norm": 0.3147265291638295, "learning_rate": 9.131089770025104e-06, "loss": 0.1875, "step": 6788 }, { "epoch": 0.5419493893190708, "grad_norm": 0.2662443550398867, "learning_rate": 9.128514117593517e-06, "loss": 0.1972, "step": 6789 }, { "epoch": 0.5420292168915143, "grad_norm": 0.27140744226925445, "learning_rate": 9.125938523417309e-06, "loss": 0.2036, "step": 6790 }, { "epoch": 0.5421090444639578, "grad_norm": 0.27461451421284827, "learning_rate": 9.123362987668657e-06, "loss": 0.1483, "step": 6791 }, { "epoch": 0.5421888720364014, "grad_norm": 0.29101202493586115, "learning_rate": 9.120787510519719e-06, "loss": 0.2024, "step": 6792 }, { "epoch": 0.5422686996088449, "grad_norm": 0.26925619404159146, "learning_rate": 9.118212092142653e-06, "loss": 0.1532, "step": 6793 }, { "epoch": 0.5423485271812885, "grad_norm": 0.28552550515854375, "learning_rate": 9.115636732709622e-06, "loss": 0.1849, "step": 6794 }, { "epoch": 0.542428354753732, "grad_norm": 0.31772371981071595, "learning_rate": 9.113061432392773e-06, "loss": 0.1422, "step": 6795 }, { "epoch": 0.5425081823261755, "grad_norm": 0.3403808727000471, "learning_rate": 9.11048619136426e-06, "loss": 0.1764, "step": 6796 }, { "epoch": 0.542588009898619, "grad_norm": 0.3126817933600204, "learning_rate": 9.107911009796224e-06, "loss": 0.1416, "step": 6797 }, { "epoch": 0.5426678374710625, "grad_norm": 0.2445159275569157, "learning_rate": 9.105335887860806e-06, "loss": 0.1335, "step": 6798 }, { "epoch": 0.542747665043506, "grad_norm": 0.2765976894374911, "learning_rate": 9.102760825730144e-06, "loss": 0.1491, "step": 6799 }, { "epoch": 0.5428274926159495, "grad_norm": 0.26936656977770645, "learning_rate": 9.100185823576369e-06, "loss": 0.1728, "step": 6800 }, { "epoch": 0.542907320188393, "grad_norm": 0.3207720040380181, "learning_rate": 9.097610881571613e-06, "loss": 0.1829, "step": 6801 }, { "epoch": 0.5429871477608366, "grad_norm": 0.2641283027206893, "learning_rate": 9.095035999887995e-06, "loss": 0.207, "step": 6802 }, { "epoch": 0.5430669753332801, "grad_norm": 0.2685197265464751, "learning_rate": 9.092461178697643e-06, "loss": 0.1871, "step": 6803 }, { "epoch": 0.5431468029057236, "grad_norm": 0.37500366739078744, "learning_rate": 9.089886418172667e-06, "loss": 0.1396, "step": 6804 }, { "epoch": 0.5432266304781671, "grad_norm": 0.25462721421902784, "learning_rate": 9.087311718485183e-06, "loss": 0.1437, "step": 6805 }, { "epoch": 0.5433064580506107, "grad_norm": 0.33294077939960826, "learning_rate": 9.0847370798073e-06, "loss": 0.1603, "step": 6806 }, { "epoch": 0.5433862856230542, "grad_norm": 0.3909288410945548, "learning_rate": 9.08216250231112e-06, "loss": 0.1877, "step": 6807 }, { "epoch": 0.5434661131954978, "grad_norm": 0.3095568508346071, "learning_rate": 9.079587986168744e-06, "loss": 0.1871, "step": 6808 }, { "epoch": 0.5435459407679413, "grad_norm": 0.3107701765080417, "learning_rate": 9.077013531552269e-06, "loss": 0.177, "step": 6809 }, { "epoch": 0.5436257683403848, "grad_norm": 0.3124583683246559, "learning_rate": 9.074439138633787e-06, "loss": 0.1747, "step": 6810 }, { "epoch": 0.5437055959128283, "grad_norm": 0.29645234303434925, "learning_rate": 9.071864807585385e-06, "loss": 0.195, "step": 6811 }, { "epoch": 0.5437854234852718, "grad_norm": 0.35074004707339107, "learning_rate": 9.069290538579148e-06, "loss": 0.1814, "step": 6812 }, { "epoch": 0.5438652510577153, "grad_norm": 0.31262457839647023, "learning_rate": 9.066716331787159e-06, "loss": 0.165, "step": 6813 }, { "epoch": 0.5439450786301588, "grad_norm": 0.32414589672496363, "learning_rate": 9.064142187381482e-06, "loss": 0.1611, "step": 6814 }, { "epoch": 0.5440249062026024, "grad_norm": 0.32356690888038153, "learning_rate": 9.061568105534204e-06, "loss": 0.1449, "step": 6815 }, { "epoch": 0.5441047337750459, "grad_norm": 0.27779968553590484, "learning_rate": 9.058994086417377e-06, "loss": 0.1574, "step": 6816 }, { "epoch": 0.5441845613474894, "grad_norm": 0.3079007869200574, "learning_rate": 9.056420130203075e-06, "loss": 0.2126, "step": 6817 }, { "epoch": 0.5442643889199329, "grad_norm": 0.3184664881980274, "learning_rate": 9.053846237063353e-06, "loss": 0.1425, "step": 6818 }, { "epoch": 0.5443442164923765, "grad_norm": 0.3538688601672658, "learning_rate": 9.051272407170267e-06, "loss": 0.1656, "step": 6819 }, { "epoch": 0.54442404406482, "grad_norm": 0.3094724803206444, "learning_rate": 9.048698640695865e-06, "loss": 0.2089, "step": 6820 }, { "epoch": 0.5445038716372635, "grad_norm": 0.33992165899882887, "learning_rate": 9.046124937812194e-06, "loss": 0.2051, "step": 6821 }, { "epoch": 0.5445836992097071, "grad_norm": 0.29359504240163314, "learning_rate": 9.0435512986913e-06, "loss": 0.1551, "step": 6822 }, { "epoch": 0.5446635267821506, "grad_norm": 0.292799132161977, "learning_rate": 9.040977723505209e-06, "loss": 0.1649, "step": 6823 }, { "epoch": 0.5447433543545941, "grad_norm": 0.2888470221867955, "learning_rate": 9.038404212425969e-06, "loss": 0.1723, "step": 6824 }, { "epoch": 0.5448231819270376, "grad_norm": 0.31192290979031745, "learning_rate": 9.035830765625594e-06, "loss": 0.2084, "step": 6825 }, { "epoch": 0.5449030094994811, "grad_norm": 0.34195142466813017, "learning_rate": 9.033257383276126e-06, "loss": 0.1266, "step": 6826 }, { "epoch": 0.5449828370719246, "grad_norm": 0.3136858993453291, "learning_rate": 9.030684065549568e-06, "loss": 0.163, "step": 6827 }, { "epoch": 0.5450626646443681, "grad_norm": 0.27885017319176997, "learning_rate": 9.028110812617951e-06, "loss": 0.1583, "step": 6828 }, { "epoch": 0.5451424922168117, "grad_norm": 0.39387652639332205, "learning_rate": 9.025537624653278e-06, "loss": 0.1644, "step": 6829 }, { "epoch": 0.5452223197892552, "grad_norm": 0.30265713722982107, "learning_rate": 9.022964501827556e-06, "loss": 0.1585, "step": 6830 }, { "epoch": 0.5453021473616987, "grad_norm": 0.34870626347292677, "learning_rate": 9.020391444312792e-06, "loss": 0.1438, "step": 6831 }, { "epoch": 0.5453819749341422, "grad_norm": 0.29535985983962215, "learning_rate": 9.017818452280984e-06, "loss": 0.1738, "step": 6832 }, { "epoch": 0.5454618025065858, "grad_norm": 0.30602273139968506, "learning_rate": 9.015245525904123e-06, "loss": 0.1682, "step": 6833 }, { "epoch": 0.5455416300790293, "grad_norm": 0.2521601750308265, "learning_rate": 9.012672665354203e-06, "loss": 0.1416, "step": 6834 }, { "epoch": 0.5456214576514729, "grad_norm": 0.2978914671159494, "learning_rate": 9.010099870803208e-06, "loss": 0.1624, "step": 6835 }, { "epoch": 0.5457012852239164, "grad_norm": 0.3109219467614431, "learning_rate": 9.007527142423122e-06, "loss": 0.1905, "step": 6836 }, { "epoch": 0.5457811127963599, "grad_norm": 0.3607516949538311, "learning_rate": 9.004954480385915e-06, "loss": 0.1857, "step": 6837 }, { "epoch": 0.5458609403688034, "grad_norm": 0.2725930690808903, "learning_rate": 9.002381884863565e-06, "loss": 0.1836, "step": 6838 }, { "epoch": 0.5459407679412469, "grad_norm": 0.27594592471114715, "learning_rate": 8.999809356028035e-06, "loss": 0.2211, "step": 6839 }, { "epoch": 0.5460205955136904, "grad_norm": 0.26631917682171935, "learning_rate": 8.997236894051295e-06, "loss": 0.1486, "step": 6840 }, { "epoch": 0.5461004230861339, "grad_norm": 0.287723800756548, "learning_rate": 8.994664499105298e-06, "loss": 0.1823, "step": 6841 }, { "epoch": 0.5461802506585774, "grad_norm": 0.32787735174140553, "learning_rate": 8.992092171362002e-06, "loss": 0.2151, "step": 6842 }, { "epoch": 0.546260078231021, "grad_norm": 0.2771223830248688, "learning_rate": 8.989519910993357e-06, "loss": 0.1716, "step": 6843 }, { "epoch": 0.5463399058034645, "grad_norm": 0.2935814869391847, "learning_rate": 8.986947718171303e-06, "loss": 0.1887, "step": 6844 }, { "epoch": 0.546419733375908, "grad_norm": 0.2633490753467271, "learning_rate": 8.984375593067789e-06, "loss": 0.1755, "step": 6845 }, { "epoch": 0.5464995609483516, "grad_norm": 0.28016899543124135, "learning_rate": 8.981803535854744e-06, "loss": 0.2155, "step": 6846 }, { "epoch": 0.5465793885207951, "grad_norm": 0.3014598638775141, "learning_rate": 8.979231546704107e-06, "loss": 0.16, "step": 6847 }, { "epoch": 0.5466592160932386, "grad_norm": 0.3483067786670061, "learning_rate": 8.976659625787799e-06, "loss": 0.1744, "step": 6848 }, { "epoch": 0.5467390436656822, "grad_norm": 0.3143645653905025, "learning_rate": 8.974087773277745e-06, "loss": 0.133, "step": 6849 }, { "epoch": 0.5468188712381257, "grad_norm": 0.3218456784026143, "learning_rate": 8.971515989345863e-06, "loss": 0.1311, "step": 6850 }, { "epoch": 0.5468986988105692, "grad_norm": 0.39183927307344607, "learning_rate": 8.968944274164069e-06, "loss": 0.172, "step": 6851 }, { "epoch": 0.5469785263830127, "grad_norm": 0.3109082176479598, "learning_rate": 8.96637262790427e-06, "loss": 0.1572, "step": 6852 }, { "epoch": 0.5470583539554562, "grad_norm": 0.31006497003820677, "learning_rate": 8.963801050738368e-06, "loss": 0.107, "step": 6853 }, { "epoch": 0.5471381815278997, "grad_norm": 0.2750998168960026, "learning_rate": 8.961229542838267e-06, "loss": 0.1452, "step": 6854 }, { "epoch": 0.5472180091003432, "grad_norm": 0.3004069883258553, "learning_rate": 8.958658104375858e-06, "loss": 0.1764, "step": 6855 }, { "epoch": 0.5472978366727868, "grad_norm": 0.28285313910047216, "learning_rate": 8.956086735523036e-06, "loss": 0.2132, "step": 6856 }, { "epoch": 0.5473776642452303, "grad_norm": 0.31254486750468086, "learning_rate": 8.953515436451682e-06, "loss": 0.1719, "step": 6857 }, { "epoch": 0.5474574918176738, "grad_norm": 0.3284020129966511, "learning_rate": 8.950944207333682e-06, "loss": 0.1687, "step": 6858 }, { "epoch": 0.5475373193901173, "grad_norm": 0.3350546410207119, "learning_rate": 8.948373048340914e-06, "loss": 0.1749, "step": 6859 }, { "epoch": 0.5476171469625609, "grad_norm": 0.2637935873733725, "learning_rate": 8.945801959645238e-06, "loss": 0.1755, "step": 6860 }, { "epoch": 0.5476969745350044, "grad_norm": 0.28659914343834036, "learning_rate": 8.943230941418536e-06, "loss": 0.1656, "step": 6861 }, { "epoch": 0.547776802107448, "grad_norm": 0.38716656891281775, "learning_rate": 8.940659993832656e-06, "loss": 0.1566, "step": 6862 }, { "epoch": 0.5478566296798915, "grad_norm": 0.3220687963455014, "learning_rate": 8.93808911705947e-06, "loss": 0.1972, "step": 6863 }, { "epoch": 0.547936457252335, "grad_norm": 0.2804307494343816, "learning_rate": 8.935518311270819e-06, "loss": 0.1831, "step": 6864 }, { "epoch": 0.5480162848247785, "grad_norm": 0.30698052511894364, "learning_rate": 8.93294757663856e-06, "loss": 0.1541, "step": 6865 }, { "epoch": 0.548096112397222, "grad_norm": 0.2809971092881812, "learning_rate": 8.930376913334534e-06, "loss": 0.1467, "step": 6866 }, { "epoch": 0.5481759399696655, "grad_norm": 0.31485722094424246, "learning_rate": 8.927806321530574e-06, "loss": 0.1872, "step": 6867 }, { "epoch": 0.548255767542109, "grad_norm": 0.2673077211944911, "learning_rate": 8.925235801398522e-06, "loss": 0.1314, "step": 6868 }, { "epoch": 0.5483355951145525, "grad_norm": 0.2871014049087972, "learning_rate": 8.922665353110198e-06, "loss": 0.166, "step": 6869 }, { "epoch": 0.5484154226869961, "grad_norm": 0.3258392777537907, "learning_rate": 8.92009497683744e-06, "loss": 0.18, "step": 6870 }, { "epoch": 0.5484952502594396, "grad_norm": 0.37044579302349706, "learning_rate": 8.917524672752049e-06, "loss": 0.2258, "step": 6871 }, { "epoch": 0.5485750778318831, "grad_norm": 0.2815063410065213, "learning_rate": 8.914954441025859e-06, "loss": 0.166, "step": 6872 }, { "epoch": 0.5486549054043267, "grad_norm": 0.2973191744561658, "learning_rate": 8.912384281830668e-06, "loss": 0.1285, "step": 6873 }, { "epoch": 0.5487347329767702, "grad_norm": 0.2551245607199898, "learning_rate": 8.90981419533828e-06, "loss": 0.1376, "step": 6874 }, { "epoch": 0.5488145605492137, "grad_norm": 0.3219224013347984, "learning_rate": 8.907244181720502e-06, "loss": 0.124, "step": 6875 }, { "epoch": 0.5488943881216573, "grad_norm": 0.3198605697167531, "learning_rate": 8.904674241149125e-06, "loss": 0.1551, "step": 6876 }, { "epoch": 0.5489742156941008, "grad_norm": 0.2805352415411388, "learning_rate": 8.902104373795941e-06, "loss": 0.1467, "step": 6877 }, { "epoch": 0.5490540432665443, "grad_norm": 0.22742309449482334, "learning_rate": 8.899534579832733e-06, "loss": 0.1854, "step": 6878 }, { "epoch": 0.5491338708389878, "grad_norm": 0.31482126808261346, "learning_rate": 8.896964859431285e-06, "loss": 0.2066, "step": 6879 }, { "epoch": 0.5492136984114313, "grad_norm": 0.2564018006791276, "learning_rate": 8.894395212763372e-06, "loss": 0.1178, "step": 6880 }, { "epoch": 0.5492935259838748, "grad_norm": 0.2943876903537851, "learning_rate": 8.891825640000761e-06, "loss": 0.1949, "step": 6881 }, { "epoch": 0.5493733535563183, "grad_norm": 0.3092046672567395, "learning_rate": 8.889256141315222e-06, "loss": 0.1636, "step": 6882 }, { "epoch": 0.5494531811287618, "grad_norm": 0.29494224600996694, "learning_rate": 8.886686716878513e-06, "loss": 0.165, "step": 6883 }, { "epoch": 0.5495330087012054, "grad_norm": 0.29354449130267307, "learning_rate": 8.884117366862392e-06, "loss": 0.168, "step": 6884 }, { "epoch": 0.5496128362736489, "grad_norm": 0.320975964261595, "learning_rate": 8.881548091438607e-06, "loss": 0.1892, "step": 6885 }, { "epoch": 0.5496926638460924, "grad_norm": 0.2700363376962084, "learning_rate": 8.87897889077891e-06, "loss": 0.148, "step": 6886 }, { "epoch": 0.549772491418536, "grad_norm": 0.28605711946851653, "learning_rate": 8.876409765055033e-06, "loss": 0.1403, "step": 6887 }, { "epoch": 0.5498523189909795, "grad_norm": 0.283821559801327, "learning_rate": 8.873840714438722e-06, "loss": 0.1361, "step": 6888 }, { "epoch": 0.549932146563423, "grad_norm": 0.34142682000445634, "learning_rate": 8.871271739101702e-06, "loss": 0.2224, "step": 6889 }, { "epoch": 0.5500119741358666, "grad_norm": 0.24749623142574126, "learning_rate": 8.868702839215695e-06, "loss": 0.1364, "step": 6890 }, { "epoch": 0.5500918017083101, "grad_norm": 0.3107824714574342, "learning_rate": 8.866134014952428e-06, "loss": 0.1755, "step": 6891 }, { "epoch": 0.5501716292807536, "grad_norm": 0.2769337468221579, "learning_rate": 8.863565266483616e-06, "loss": 0.1612, "step": 6892 }, { "epoch": 0.5502514568531971, "grad_norm": 0.3502887316443406, "learning_rate": 8.860996593980968e-06, "loss": 0.2013, "step": 6893 }, { "epoch": 0.5503312844256406, "grad_norm": 0.3472544662553231, "learning_rate": 8.85842799761619e-06, "loss": 0.2011, "step": 6894 }, { "epoch": 0.5504111119980841, "grad_norm": 0.29729980629740266, "learning_rate": 8.855859477560983e-06, "loss": 0.1857, "step": 6895 }, { "epoch": 0.5504909395705276, "grad_norm": 0.30346219787471224, "learning_rate": 8.853291033987042e-06, "loss": 0.1601, "step": 6896 }, { "epoch": 0.5505707671429712, "grad_norm": 0.3146566778767765, "learning_rate": 8.850722667066056e-06, "loss": 0.1695, "step": 6897 }, { "epoch": 0.5506505947154147, "grad_norm": 0.2646023895552621, "learning_rate": 8.848154376969715e-06, "loss": 0.1524, "step": 6898 }, { "epoch": 0.5507304222878582, "grad_norm": 0.2789964528682491, "learning_rate": 8.845586163869691e-06, "loss": 0.1968, "step": 6899 }, { "epoch": 0.5508102498603017, "grad_norm": 0.33689993287405046, "learning_rate": 8.843018027937668e-06, "loss": 0.2271, "step": 6900 }, { "epoch": 0.5508900774327453, "grad_norm": 0.2651761359341008, "learning_rate": 8.840449969345308e-06, "loss": 0.1514, "step": 6901 }, { "epoch": 0.5509699050051888, "grad_norm": 0.27193092749960585, "learning_rate": 8.83788198826428e-06, "loss": 0.182, "step": 6902 }, { "epoch": 0.5510497325776323, "grad_norm": 0.30804688040653, "learning_rate": 8.835314084866245e-06, "loss": 0.1842, "step": 6903 }, { "epoch": 0.5511295601500759, "grad_norm": 0.26087913535945484, "learning_rate": 8.832746259322847e-06, "loss": 0.1936, "step": 6904 }, { "epoch": 0.5512093877225194, "grad_norm": 0.27959743066399345, "learning_rate": 8.830178511805749e-06, "loss": 0.2004, "step": 6905 }, { "epoch": 0.5512892152949629, "grad_norm": 0.27913532422664994, "learning_rate": 8.827610842486583e-06, "loss": 0.1354, "step": 6906 }, { "epoch": 0.5513690428674064, "grad_norm": 0.3039964309129671, "learning_rate": 8.825043251536998e-06, "loss": 0.2592, "step": 6907 }, { "epoch": 0.5514488704398499, "grad_norm": 0.32153925434557495, "learning_rate": 8.822475739128616e-06, "loss": 0.1862, "step": 6908 }, { "epoch": 0.5515286980122934, "grad_norm": 0.2802977563291881, "learning_rate": 8.819908305433074e-06, "loss": 0.1925, "step": 6909 }, { "epoch": 0.5516085255847369, "grad_norm": 0.305922359900097, "learning_rate": 8.817340950621994e-06, "loss": 0.1851, "step": 6910 }, { "epoch": 0.5516883531571805, "grad_norm": 0.2777066363745238, "learning_rate": 8.814773674866984e-06, "loss": 0.1772, "step": 6911 }, { "epoch": 0.551768180729624, "grad_norm": 0.2942125694874683, "learning_rate": 8.812206478339671e-06, "loss": 0.1575, "step": 6912 }, { "epoch": 0.5518480083020675, "grad_norm": 0.2667946424459094, "learning_rate": 8.809639361211648e-06, "loss": 0.1747, "step": 6913 }, { "epoch": 0.5519278358745111, "grad_norm": 0.27270161910924723, "learning_rate": 8.807072323654529e-06, "loss": 0.1196, "step": 6914 }, { "epoch": 0.5520076634469546, "grad_norm": 0.2847194929769998, "learning_rate": 8.804505365839898e-06, "loss": 0.1383, "step": 6915 }, { "epoch": 0.5520874910193981, "grad_norm": 0.26677135658855694, "learning_rate": 8.801938487939359e-06, "loss": 0.1912, "step": 6916 }, { "epoch": 0.5521673185918416, "grad_norm": 0.26875422602861454, "learning_rate": 8.799371690124485e-06, "loss": 0.2057, "step": 6917 }, { "epoch": 0.5522471461642852, "grad_norm": 0.3090256493878061, "learning_rate": 8.796804972566867e-06, "loss": 0.174, "step": 6918 }, { "epoch": 0.5523269737367287, "grad_norm": 0.2956410539367024, "learning_rate": 8.794238335438073e-06, "loss": 0.1672, "step": 6919 }, { "epoch": 0.5524068013091722, "grad_norm": 0.3318700470050391, "learning_rate": 8.791671778909674e-06, "loss": 0.227, "step": 6920 }, { "epoch": 0.5524866288816157, "grad_norm": 0.2735031796505902, "learning_rate": 8.789105303153235e-06, "loss": 0.1486, "step": 6921 }, { "epoch": 0.5525664564540592, "grad_norm": 0.2813598857142178, "learning_rate": 8.786538908340313e-06, "loss": 0.1368, "step": 6922 }, { "epoch": 0.5526462840265027, "grad_norm": 0.2830820384858069, "learning_rate": 8.783972594642465e-06, "loss": 0.191, "step": 6923 }, { "epoch": 0.5527261115989462, "grad_norm": 0.2565972682794761, "learning_rate": 8.781406362231234e-06, "loss": 0.1638, "step": 6924 }, { "epoch": 0.5528059391713898, "grad_norm": 0.27453736583221544, "learning_rate": 8.778840211278168e-06, "loss": 0.1352, "step": 6925 }, { "epoch": 0.5528857667438333, "grad_norm": 0.2813157369488264, "learning_rate": 8.776274141954802e-06, "loss": 0.1274, "step": 6926 }, { "epoch": 0.5529655943162768, "grad_norm": 0.3302777211757112, "learning_rate": 8.773708154432664e-06, "loss": 0.1996, "step": 6927 }, { "epoch": 0.5530454218887204, "grad_norm": 0.3079216326351255, "learning_rate": 8.771142248883284e-06, "loss": 0.1674, "step": 6928 }, { "epoch": 0.5531252494611639, "grad_norm": 0.2918251595753202, "learning_rate": 8.76857642547818e-06, "loss": 0.1628, "step": 6929 }, { "epoch": 0.5532050770336074, "grad_norm": 0.27606682821165246, "learning_rate": 8.766010684388872e-06, "loss": 0.1677, "step": 6930 }, { "epoch": 0.553284904606051, "grad_norm": 0.2621559028011505, "learning_rate": 8.763445025786862e-06, "loss": 0.2184, "step": 6931 }, { "epoch": 0.5533647321784945, "grad_norm": 0.2643617785402462, "learning_rate": 8.760879449843662e-06, "loss": 0.2148, "step": 6932 }, { "epoch": 0.553444559750938, "grad_norm": 0.29418152931670655, "learning_rate": 8.758313956730767e-06, "loss": 0.1837, "step": 6933 }, { "epoch": 0.5535243873233815, "grad_norm": 0.29673681196667473, "learning_rate": 8.75574854661967e-06, "loss": 0.2157, "step": 6934 }, { "epoch": 0.553604214895825, "grad_norm": 0.3075260087502399, "learning_rate": 8.753183219681857e-06, "loss": 0.1762, "step": 6935 }, { "epoch": 0.5536840424682685, "grad_norm": 0.27412393431979043, "learning_rate": 8.75061797608881e-06, "loss": 0.185, "step": 6936 }, { "epoch": 0.553763870040712, "grad_norm": 0.3607645514025343, "learning_rate": 8.748052816012011e-06, "loss": 0.1931, "step": 6937 }, { "epoch": 0.5538436976131556, "grad_norm": 0.2864115351189518, "learning_rate": 8.745487739622924e-06, "loss": 0.1824, "step": 6938 }, { "epoch": 0.5539235251855991, "grad_norm": 0.2946057158890387, "learning_rate": 8.74292274709302e-06, "loss": 0.1431, "step": 6939 }, { "epoch": 0.5540033527580426, "grad_norm": 0.32684714540122417, "learning_rate": 8.740357838593754e-06, "loss": 0.1819, "step": 6940 }, { "epoch": 0.5540831803304862, "grad_norm": 0.30481336128384656, "learning_rate": 8.73779301429658e-06, "loss": 0.1686, "step": 6941 }, { "epoch": 0.5541630079029297, "grad_norm": 0.2706371840015195, "learning_rate": 8.735228274372949e-06, "loss": 0.1618, "step": 6942 }, { "epoch": 0.5542428354753732, "grad_norm": 0.26441202600893604, "learning_rate": 8.732663618994303e-06, "loss": 0.1634, "step": 6943 }, { "epoch": 0.5543226630478167, "grad_norm": 0.30301855929680377, "learning_rate": 8.73009904833208e-06, "loss": 0.1784, "step": 6944 }, { "epoch": 0.5544024906202603, "grad_norm": 0.3005483942332645, "learning_rate": 8.727534562557706e-06, "loss": 0.1918, "step": 6945 }, { "epoch": 0.5544823181927038, "grad_norm": 0.29863443465549905, "learning_rate": 8.724970161842614e-06, "loss": 0.1452, "step": 6946 }, { "epoch": 0.5545621457651473, "grad_norm": 0.27882561043843923, "learning_rate": 8.722405846358225e-06, "loss": 0.1708, "step": 6947 }, { "epoch": 0.5546419733375908, "grad_norm": 0.2585582313384055, "learning_rate": 8.719841616275939e-06, "loss": 0.1948, "step": 6948 }, { "epoch": 0.5547218009100343, "grad_norm": 0.32589177566060945, "learning_rate": 8.717277471767184e-06, "loss": 0.1454, "step": 6949 }, { "epoch": 0.5548016284824778, "grad_norm": 0.3028832650323331, "learning_rate": 8.714713413003347e-06, "loss": 0.1936, "step": 6950 }, { "epoch": 0.5548814560549213, "grad_norm": 0.2875574204985534, "learning_rate": 8.712149440155838e-06, "loss": 0.1605, "step": 6951 }, { "epoch": 0.5549612836273649, "grad_norm": 0.3228556828357102, "learning_rate": 8.709585553396035e-06, "loss": 0.1769, "step": 6952 }, { "epoch": 0.5550411111998084, "grad_norm": 0.2979927967923248, "learning_rate": 8.707021752895337e-06, "loss": 0.1663, "step": 6953 }, { "epoch": 0.5551209387722519, "grad_norm": 0.2714538132436697, "learning_rate": 8.704458038825115e-06, "loss": 0.1596, "step": 6954 }, { "epoch": 0.5552007663446955, "grad_norm": 0.27343547387493766, "learning_rate": 8.701894411356747e-06, "loss": 0.1515, "step": 6955 }, { "epoch": 0.555280593917139, "grad_norm": 0.2595960140437326, "learning_rate": 8.699330870661604e-06, "loss": 0.1322, "step": 6956 }, { "epoch": 0.5553604214895825, "grad_norm": 0.3092667986237396, "learning_rate": 8.696767416911039e-06, "loss": 0.1556, "step": 6957 }, { "epoch": 0.555440249062026, "grad_norm": 0.36220006929019605, "learning_rate": 8.69420405027642e-06, "loss": 0.1906, "step": 6958 }, { "epoch": 0.5555200766344696, "grad_norm": 0.28271225108003245, "learning_rate": 8.691640770929086e-06, "loss": 0.1733, "step": 6959 }, { "epoch": 0.5555999042069131, "grad_norm": 0.2836824225382431, "learning_rate": 8.689077579040396e-06, "loss": 0.1689, "step": 6960 }, { "epoch": 0.5556797317793566, "grad_norm": 0.3288182822233375, "learning_rate": 8.686514474781676e-06, "loss": 0.1832, "step": 6961 }, { "epoch": 0.5557595593518001, "grad_norm": 0.29799531400048007, "learning_rate": 8.68395145832427e-06, "loss": 0.1629, "step": 6962 }, { "epoch": 0.5558393869242436, "grad_norm": 0.3123950524719914, "learning_rate": 8.681388529839502e-06, "loss": 0.1531, "step": 6963 }, { "epoch": 0.5559192144966871, "grad_norm": 0.2966109996011532, "learning_rate": 8.678825689498687e-06, "loss": 0.1728, "step": 6964 }, { "epoch": 0.5559990420691306, "grad_norm": 0.2860633440256059, "learning_rate": 8.67626293747315e-06, "loss": 0.1855, "step": 6965 }, { "epoch": 0.5560788696415742, "grad_norm": 0.32504092566702786, "learning_rate": 8.673700273934195e-06, "loss": 0.1827, "step": 6966 }, { "epoch": 0.5561586972140177, "grad_norm": 0.3071511480139295, "learning_rate": 8.671137699053129e-06, "loss": 0.1546, "step": 6967 }, { "epoch": 0.5562385247864613, "grad_norm": 0.3119092336475546, "learning_rate": 8.668575213001248e-06, "loss": 0.124, "step": 6968 }, { "epoch": 0.5563183523589048, "grad_norm": 0.27447195567524074, "learning_rate": 8.666012815949848e-06, "loss": 0.1399, "step": 6969 }, { "epoch": 0.5563981799313483, "grad_norm": 0.24865586658208264, "learning_rate": 8.663450508070209e-06, "loss": 0.1697, "step": 6970 }, { "epoch": 0.5564780075037918, "grad_norm": 0.3311938926653509, "learning_rate": 8.660888289533614e-06, "loss": 0.1613, "step": 6971 }, { "epoch": 0.5565578350762354, "grad_norm": 0.3411778046087404, "learning_rate": 8.658326160511338e-06, "loss": 0.1582, "step": 6972 }, { "epoch": 0.5566376626486789, "grad_norm": 0.2786693406370914, "learning_rate": 8.655764121174649e-06, "loss": 0.1789, "step": 6973 }, { "epoch": 0.5567174902211224, "grad_norm": 0.2333728656388361, "learning_rate": 8.65320217169481e-06, "loss": 0.166, "step": 6974 }, { "epoch": 0.5567973177935659, "grad_norm": 0.2456964107103093, "learning_rate": 8.650640312243074e-06, "loss": 0.1329, "step": 6975 }, { "epoch": 0.5568771453660094, "grad_norm": 0.2893118062102455, "learning_rate": 8.648078542990694e-06, "loss": 0.1748, "step": 6976 }, { "epoch": 0.5569569729384529, "grad_norm": 0.31399249971376475, "learning_rate": 8.645516864108914e-06, "loss": 0.1584, "step": 6977 }, { "epoch": 0.5570368005108964, "grad_norm": 0.2901999494321072, "learning_rate": 8.64295527576897e-06, "loss": 0.1611, "step": 6978 }, { "epoch": 0.55711662808334, "grad_norm": 0.31798683864532257, "learning_rate": 8.640393778142096e-06, "loss": 0.2147, "step": 6979 }, { "epoch": 0.5571964556557835, "grad_norm": 0.4081418274387848, "learning_rate": 8.637832371399517e-06, "loss": 0.1781, "step": 6980 }, { "epoch": 0.557276283228227, "grad_norm": 0.34637939530306194, "learning_rate": 8.635271055712455e-06, "loss": 0.1664, "step": 6981 }, { "epoch": 0.5573561108006706, "grad_norm": 0.3168593892015056, "learning_rate": 8.63270983125212e-06, "loss": 0.1772, "step": 6982 }, { "epoch": 0.5574359383731141, "grad_norm": 0.32301348270416663, "learning_rate": 8.630148698189723e-06, "loss": 0.1719, "step": 6983 }, { "epoch": 0.5575157659455576, "grad_norm": 0.3582979979839976, "learning_rate": 8.627587656696463e-06, "loss": 0.1687, "step": 6984 }, { "epoch": 0.5575955935180011, "grad_norm": 0.281465640779517, "learning_rate": 8.625026706943538e-06, "loss": 0.1411, "step": 6985 }, { "epoch": 0.5576754210904447, "grad_norm": 0.28597905818311864, "learning_rate": 8.622465849102137e-06, "loss": 0.2214, "step": 6986 }, { "epoch": 0.5577552486628882, "grad_norm": 0.29669200826285524, "learning_rate": 8.61990508334344e-06, "loss": 0.1898, "step": 6987 }, { "epoch": 0.5578350762353317, "grad_norm": 0.27371420350799675, "learning_rate": 8.617344409838631e-06, "loss": 0.1992, "step": 6988 }, { "epoch": 0.5579149038077752, "grad_norm": 0.25980685977744655, "learning_rate": 8.614783828758873e-06, "loss": 0.1773, "step": 6989 }, { "epoch": 0.5579947313802187, "grad_norm": 0.27084009598924286, "learning_rate": 8.612223340275336e-06, "loss": 0.1501, "step": 6990 }, { "epoch": 0.5580745589526622, "grad_norm": 0.27155185746174104, "learning_rate": 8.609662944559173e-06, "loss": 0.1585, "step": 6991 }, { "epoch": 0.5581543865251057, "grad_norm": 0.3277675602544329, "learning_rate": 8.607102641781544e-06, "loss": 0.2023, "step": 6992 }, { "epoch": 0.5582342140975493, "grad_norm": 0.3487605339955512, "learning_rate": 8.604542432113592e-06, "loss": 0.2139, "step": 6993 }, { "epoch": 0.5583140416699928, "grad_norm": 0.35398537275177644, "learning_rate": 8.601982315726451e-06, "loss": 0.1144, "step": 6994 }, { "epoch": 0.5583938692424364, "grad_norm": 0.30496040092068494, "learning_rate": 8.599422292791266e-06, "loss": 0.1301, "step": 6995 }, { "epoch": 0.5584736968148799, "grad_norm": 0.34180945361447834, "learning_rate": 8.596862363479151e-06, "loss": 0.1379, "step": 6996 }, { "epoch": 0.5585535243873234, "grad_norm": 0.3354052343744983, "learning_rate": 8.59430252796124e-06, "loss": 0.1248, "step": 6997 }, { "epoch": 0.5586333519597669, "grad_norm": 0.29596375683017795, "learning_rate": 8.591742786408637e-06, "loss": 0.1842, "step": 6998 }, { "epoch": 0.5587131795322104, "grad_norm": 0.27651138572114903, "learning_rate": 8.589183138992461e-06, "loss": 0.1554, "step": 6999 }, { "epoch": 0.558793007104654, "grad_norm": 0.29259453477051756, "learning_rate": 8.586623585883807e-06, "loss": 0.1898, "step": 7000 }, { "epoch": 0.5588728346770975, "grad_norm": 0.2978562543502448, "learning_rate": 8.584064127253769e-06, "loss": 0.1625, "step": 7001 }, { "epoch": 0.558952662249541, "grad_norm": 0.31710942413271903, "learning_rate": 8.581504763273449e-06, "loss": 0.202, "step": 7002 }, { "epoch": 0.5590324898219845, "grad_norm": 0.30158082295851346, "learning_rate": 8.578945494113913e-06, "loss": 0.1793, "step": 7003 }, { "epoch": 0.559112317394428, "grad_norm": 0.30153726311177054, "learning_rate": 8.576386319946253e-06, "loss": 0.128, "step": 7004 }, { "epoch": 0.5591921449668715, "grad_norm": 0.2720501615308906, "learning_rate": 8.573827240941528e-06, "loss": 0.1682, "step": 7005 }, { "epoch": 0.559271972539315, "grad_norm": 0.29139449675033025, "learning_rate": 8.571268257270816e-06, "loss": 0.1653, "step": 7006 }, { "epoch": 0.5593518001117586, "grad_norm": 0.27751837675668917, "learning_rate": 8.568709369105162e-06, "loss": 0.1551, "step": 7007 }, { "epoch": 0.5594316276842021, "grad_norm": 0.35818837709694445, "learning_rate": 8.566150576615623e-06, "loss": 0.1582, "step": 7008 }, { "epoch": 0.5595114552566457, "grad_norm": 0.2874505372013501, "learning_rate": 8.563591879973245e-06, "loss": 0.1688, "step": 7009 }, { "epoch": 0.5595912828290892, "grad_norm": 0.3220442325108485, "learning_rate": 8.56103327934906e-06, "loss": 0.1575, "step": 7010 }, { "epoch": 0.5596711104015327, "grad_norm": 0.30663824053270383, "learning_rate": 8.558474774914113e-06, "loss": 0.1603, "step": 7011 }, { "epoch": 0.5597509379739762, "grad_norm": 0.2653665626253829, "learning_rate": 8.555916366839418e-06, "loss": 0.2227, "step": 7012 }, { "epoch": 0.5598307655464198, "grad_norm": 0.25203624367200717, "learning_rate": 8.553358055296e-06, "loss": 0.1755, "step": 7013 }, { "epoch": 0.5599105931188633, "grad_norm": 0.27587328816603335, "learning_rate": 8.550799840454873e-06, "loss": 0.1747, "step": 7014 }, { "epoch": 0.5599904206913068, "grad_norm": 0.3259273473640523, "learning_rate": 8.548241722487038e-06, "loss": 0.1502, "step": 7015 }, { "epoch": 0.5600702482637503, "grad_norm": 0.3255043316314089, "learning_rate": 8.5456837015635e-06, "loss": 0.2117, "step": 7016 }, { "epoch": 0.5601500758361938, "grad_norm": 0.2898103441668161, "learning_rate": 8.543125777855253e-06, "loss": 0.1184, "step": 7017 }, { "epoch": 0.5602299034086373, "grad_norm": 0.3135570015719111, "learning_rate": 8.540567951533281e-06, "loss": 0.1584, "step": 7018 }, { "epoch": 0.5603097309810808, "grad_norm": 0.3510476471988137, "learning_rate": 8.538010222768565e-06, "loss": 0.1666, "step": 7019 }, { "epoch": 0.5603895585535243, "grad_norm": 0.33183083117751605, "learning_rate": 8.53545259173208e-06, "loss": 0.1705, "step": 7020 }, { "epoch": 0.5604693861259679, "grad_norm": 0.33651050131214927, "learning_rate": 8.532895058594794e-06, "loss": 0.1337, "step": 7021 }, { "epoch": 0.5605492136984115, "grad_norm": 0.30952696147157865, "learning_rate": 8.530337623527668e-06, "loss": 0.2021, "step": 7022 }, { "epoch": 0.560629041270855, "grad_norm": 0.33870331542653614, "learning_rate": 8.527780286701657e-06, "loss": 0.1793, "step": 7023 }, { "epoch": 0.5607088688432985, "grad_norm": 0.2874026305347619, "learning_rate": 8.525223048287704e-06, "loss": 0.205, "step": 7024 }, { "epoch": 0.560788696415742, "grad_norm": 0.3288576590902197, "learning_rate": 8.522665908456756e-06, "loss": 0.1523, "step": 7025 }, { "epoch": 0.5608685239881855, "grad_norm": 0.30023007823559034, "learning_rate": 8.520108867379744e-06, "loss": 0.182, "step": 7026 }, { "epoch": 0.560948351560629, "grad_norm": 0.2958312277921718, "learning_rate": 8.5175519252276e-06, "loss": 0.1595, "step": 7027 }, { "epoch": 0.5610281791330726, "grad_norm": 0.3156729111683735, "learning_rate": 8.51499508217124e-06, "loss": 0.1778, "step": 7028 }, { "epoch": 0.5611080067055161, "grad_norm": 0.30207961081443563, "learning_rate": 8.512438338381587e-06, "loss": 0.1463, "step": 7029 }, { "epoch": 0.5611878342779596, "grad_norm": 0.3053214950615924, "learning_rate": 8.509881694029543e-06, "loss": 0.1462, "step": 7030 }, { "epoch": 0.5612676618504031, "grad_norm": 0.30521070187132654, "learning_rate": 8.507325149286007e-06, "loss": 0.1728, "step": 7031 }, { "epoch": 0.5613474894228466, "grad_norm": 0.2789839995398081, "learning_rate": 8.504768704321881e-06, "loss": 0.1437, "step": 7032 }, { "epoch": 0.5614273169952901, "grad_norm": 0.31524553496069246, "learning_rate": 8.502212359308047e-06, "loss": 0.1726, "step": 7033 }, { "epoch": 0.5615071445677337, "grad_norm": 0.2763272735675374, "learning_rate": 8.499656114415392e-06, "loss": 0.1622, "step": 7034 }, { "epoch": 0.5615869721401772, "grad_norm": 0.27620602207315437, "learning_rate": 8.497099969814785e-06, "loss": 0.1954, "step": 7035 }, { "epoch": 0.5616667997126208, "grad_norm": 0.32218352100259856, "learning_rate": 8.494543925677101e-06, "loss": 0.1928, "step": 7036 }, { "epoch": 0.5617466272850643, "grad_norm": 0.3349506093241961, "learning_rate": 8.4919879821732e-06, "loss": 0.2039, "step": 7037 }, { "epoch": 0.5618264548575078, "grad_norm": 0.2685572562179536, "learning_rate": 8.489432139473927e-06, "loss": 0.183, "step": 7038 }, { "epoch": 0.5619062824299513, "grad_norm": 0.27581930748533473, "learning_rate": 8.486876397750144e-06, "loss": 0.2098, "step": 7039 }, { "epoch": 0.5619861100023948, "grad_norm": 0.31992017858709026, "learning_rate": 8.484320757172681e-06, "loss": 0.1921, "step": 7040 }, { "epoch": 0.5620659375748384, "grad_norm": 0.2901387554116516, "learning_rate": 8.481765217912382e-06, "loss": 0.1877, "step": 7041 }, { "epoch": 0.5621457651472819, "grad_norm": 0.30208406822707873, "learning_rate": 8.479209780140065e-06, "loss": 0.1549, "step": 7042 }, { "epoch": 0.5622255927197254, "grad_norm": 0.31867811181449934, "learning_rate": 8.47665444402656e-06, "loss": 0.1949, "step": 7043 }, { "epoch": 0.5623054202921689, "grad_norm": 0.3026817818404108, "learning_rate": 8.474099209742676e-06, "loss": 0.1661, "step": 7044 }, { "epoch": 0.5623852478646124, "grad_norm": 0.3151237518305181, "learning_rate": 8.471544077459219e-06, "loss": 0.1879, "step": 7045 }, { "epoch": 0.5624650754370559, "grad_norm": 0.270863273151773, "learning_rate": 8.468989047346993e-06, "loss": 0.1607, "step": 7046 }, { "epoch": 0.5625449030094994, "grad_norm": 0.3529605971309938, "learning_rate": 8.46643411957679e-06, "loss": 0.1898, "step": 7047 }, { "epoch": 0.562624730581943, "grad_norm": 0.31101583290683044, "learning_rate": 8.463879294319398e-06, "loss": 0.1446, "step": 7048 }, { "epoch": 0.5627045581543866, "grad_norm": 0.34984158811653043, "learning_rate": 8.461324571745592e-06, "loss": 0.1973, "step": 7049 }, { "epoch": 0.5627843857268301, "grad_norm": 0.3019680469675969, "learning_rate": 8.458769952026155e-06, "loss": 0.1492, "step": 7050 }, { "epoch": 0.5628642132992736, "grad_norm": 0.2894293105749601, "learning_rate": 8.456215435331843e-06, "loss": 0.1726, "step": 7051 }, { "epoch": 0.5629440408717171, "grad_norm": 0.25870495265175947, "learning_rate": 8.453661021833423e-06, "loss": 0.1577, "step": 7052 }, { "epoch": 0.5630238684441606, "grad_norm": 0.31041651170111184, "learning_rate": 8.451106711701645e-06, "loss": 0.16, "step": 7053 }, { "epoch": 0.5631036960166041, "grad_norm": 0.3147899574414897, "learning_rate": 8.44855250510725e-06, "loss": 0.1728, "step": 7054 }, { "epoch": 0.5631835235890477, "grad_norm": 0.3199378329258832, "learning_rate": 8.445998402220982e-06, "loss": 0.1864, "step": 7055 }, { "epoch": 0.5632633511614912, "grad_norm": 0.40063001414704175, "learning_rate": 8.443444403213571e-06, "loss": 0.1785, "step": 7056 }, { "epoch": 0.5633431787339347, "grad_norm": 0.3487179835362493, "learning_rate": 8.440890508255742e-06, "loss": 0.1626, "step": 7057 }, { "epoch": 0.5634230063063782, "grad_norm": 0.27898643401094775, "learning_rate": 8.438336717518212e-06, "loss": 0.1469, "step": 7058 }, { "epoch": 0.5635028338788217, "grad_norm": 0.27055020901143034, "learning_rate": 8.435783031171693e-06, "loss": 0.1841, "step": 7059 }, { "epoch": 0.5635826614512652, "grad_norm": 0.32137773785636226, "learning_rate": 8.433229449386888e-06, "loss": 0.2458, "step": 7060 }, { "epoch": 0.5636624890237087, "grad_norm": 0.32988596133201536, "learning_rate": 8.430675972334493e-06, "loss": 0.1836, "step": 7061 }, { "epoch": 0.5637423165961523, "grad_norm": 0.30300418736604307, "learning_rate": 8.428122600185198e-06, "loss": 0.2085, "step": 7062 }, { "epoch": 0.5638221441685959, "grad_norm": 0.31285490990951376, "learning_rate": 8.425569333109687e-06, "loss": 0.2114, "step": 7063 }, { "epoch": 0.5639019717410394, "grad_norm": 0.26006993230406455, "learning_rate": 8.423016171278637e-06, "loss": 0.15, "step": 7064 }, { "epoch": 0.5639817993134829, "grad_norm": 0.2918087360854651, "learning_rate": 8.420463114862713e-06, "loss": 0.1404, "step": 7065 }, { "epoch": 0.5640616268859264, "grad_norm": 0.3185452160625684, "learning_rate": 8.417910164032582e-06, "loss": 0.2105, "step": 7066 }, { "epoch": 0.5641414544583699, "grad_norm": 0.3452943946175268, "learning_rate": 8.415357318958894e-06, "loss": 0.1709, "step": 7067 }, { "epoch": 0.5642212820308135, "grad_norm": 0.32148079806090163, "learning_rate": 8.412804579812297e-06, "loss": 0.1626, "step": 7068 }, { "epoch": 0.564301109603257, "grad_norm": 0.3218153812360659, "learning_rate": 8.410251946763434e-06, "loss": 0.1413, "step": 7069 }, { "epoch": 0.5643809371757005, "grad_norm": 0.28628243390680524, "learning_rate": 8.407699419982937e-06, "loss": 0.163, "step": 7070 }, { "epoch": 0.564460764748144, "grad_norm": 0.30888082227949604, "learning_rate": 8.405146999641431e-06, "loss": 0.184, "step": 7071 }, { "epoch": 0.5645405923205875, "grad_norm": 0.26666348736613676, "learning_rate": 8.402594685909537e-06, "loss": 0.1755, "step": 7072 }, { "epoch": 0.564620419893031, "grad_norm": 0.2710012209381661, "learning_rate": 8.400042478957869e-06, "loss": 0.1791, "step": 7073 }, { "epoch": 0.5647002474654745, "grad_norm": 0.2938208614420277, "learning_rate": 8.397490378957031e-06, "loss": 0.1833, "step": 7074 }, { "epoch": 0.564780075037918, "grad_norm": 0.3200511730027847, "learning_rate": 8.394938386077611e-06, "loss": 0.1673, "step": 7075 }, { "epoch": 0.5648599026103616, "grad_norm": 0.3312473811135418, "learning_rate": 8.392386500490214e-06, "loss": 0.1764, "step": 7076 }, { "epoch": 0.5649397301828052, "grad_norm": 0.3177848726893807, "learning_rate": 8.389834722365417e-06, "loss": 0.1626, "step": 7077 }, { "epoch": 0.5650195577552487, "grad_norm": 0.3013377868006366, "learning_rate": 8.387283051873796e-06, "loss": 0.1625, "step": 7078 }, { "epoch": 0.5650993853276922, "grad_norm": 0.30925933848340137, "learning_rate": 8.38473148918592e-06, "loss": 0.1891, "step": 7079 }, { "epoch": 0.5651792129001357, "grad_norm": 0.30233072553312307, "learning_rate": 8.382180034472353e-06, "loss": 0.2102, "step": 7080 }, { "epoch": 0.5652590404725792, "grad_norm": 0.2735764103402404, "learning_rate": 8.379628687903648e-06, "loss": 0.1802, "step": 7081 }, { "epoch": 0.5653388680450228, "grad_norm": 0.291885741255893, "learning_rate": 8.377077449650352e-06, "loss": 0.1954, "step": 7082 }, { "epoch": 0.5654186956174663, "grad_norm": 0.3435264864277314, "learning_rate": 8.374526319883011e-06, "loss": 0.1923, "step": 7083 }, { "epoch": 0.5654985231899098, "grad_norm": 0.3377057523922656, "learning_rate": 8.371975298772143e-06, "loss": 0.1689, "step": 7084 }, { "epoch": 0.5655783507623533, "grad_norm": 0.2799693045279635, "learning_rate": 8.369424386488293e-06, "loss": 0.2262, "step": 7085 }, { "epoch": 0.5656581783347968, "grad_norm": 0.3128449298446735, "learning_rate": 8.366873583201961e-06, "loss": 0.1529, "step": 7086 }, { "epoch": 0.5657380059072403, "grad_norm": 0.32485320323090416, "learning_rate": 8.364322889083675e-06, "loss": 0.1858, "step": 7087 }, { "epoch": 0.5658178334796838, "grad_norm": 0.2790253287743837, "learning_rate": 8.361772304303925e-06, "loss": 0.1362, "step": 7088 }, { "epoch": 0.5658976610521274, "grad_norm": 0.3216469220783606, "learning_rate": 8.35922182903322e-06, "loss": 0.1532, "step": 7089 }, { "epoch": 0.565977488624571, "grad_norm": 0.2854480932442451, "learning_rate": 8.356671463442039e-06, "loss": 0.1402, "step": 7090 }, { "epoch": 0.5660573161970145, "grad_norm": 0.3425018648089285, "learning_rate": 8.354121207700865e-06, "loss": 0.1644, "step": 7091 }, { "epoch": 0.566137143769458, "grad_norm": 0.26716640404595265, "learning_rate": 8.351571061980178e-06, "loss": 0.1656, "step": 7092 }, { "epoch": 0.5662169713419015, "grad_norm": 0.35888605397340345, "learning_rate": 8.349021026450438e-06, "loss": 0.1956, "step": 7093 }, { "epoch": 0.566296798914345, "grad_norm": 0.30360684001764876, "learning_rate": 8.346471101282113e-06, "loss": 0.1495, "step": 7094 }, { "epoch": 0.5663766264867885, "grad_norm": 0.30106182580292457, "learning_rate": 8.343921286645645e-06, "loss": 0.171, "step": 7095 }, { "epoch": 0.5664564540592321, "grad_norm": 0.2862442593463089, "learning_rate": 8.341371582711493e-06, "loss": 0.1665, "step": 7096 }, { "epoch": 0.5665362816316756, "grad_norm": 0.29558996563587536, "learning_rate": 8.338821989650085e-06, "loss": 0.1418, "step": 7097 }, { "epoch": 0.5666161092041191, "grad_norm": 0.2900799821161709, "learning_rate": 8.33627250763185e-06, "loss": 0.1299, "step": 7098 }, { "epoch": 0.5666959367765626, "grad_norm": 0.3233411421437765, "learning_rate": 8.333723136827216e-06, "loss": 0.1695, "step": 7099 }, { "epoch": 0.5667757643490061, "grad_norm": 0.30777875475784006, "learning_rate": 8.331173877406594e-06, "loss": 0.1581, "step": 7100 }, { "epoch": 0.5668555919214496, "grad_norm": 0.3219925079223602, "learning_rate": 8.328624729540396e-06, "loss": 0.1425, "step": 7101 }, { "epoch": 0.5669354194938931, "grad_norm": 0.27528964025207975, "learning_rate": 8.326075693399019e-06, "loss": 0.1206, "step": 7102 }, { "epoch": 0.5670152470663367, "grad_norm": 0.30063124352929865, "learning_rate": 8.323526769152859e-06, "loss": 0.1924, "step": 7103 }, { "epoch": 0.5670950746387803, "grad_norm": 0.2964347197972413, "learning_rate": 8.3209779569723e-06, "loss": 0.1297, "step": 7104 }, { "epoch": 0.5671749022112238, "grad_norm": 0.3025340982246907, "learning_rate": 8.318429257027719e-06, "loss": 0.1567, "step": 7105 }, { "epoch": 0.5672547297836673, "grad_norm": 0.2761997374195297, "learning_rate": 8.315880669489488e-06, "loss": 0.1547, "step": 7106 }, { "epoch": 0.5673345573561108, "grad_norm": 0.2607386917289951, "learning_rate": 8.313332194527967e-06, "loss": 0.1574, "step": 7107 }, { "epoch": 0.5674143849285543, "grad_norm": 0.284051955924415, "learning_rate": 8.310783832313519e-06, "loss": 0.1556, "step": 7108 }, { "epoch": 0.5674942125009979, "grad_norm": 0.26287170755908673, "learning_rate": 8.308235583016481e-06, "loss": 0.1826, "step": 7109 }, { "epoch": 0.5675740400734414, "grad_norm": 0.3273816711700881, "learning_rate": 8.305687446807205e-06, "loss": 0.1536, "step": 7110 }, { "epoch": 0.5676538676458849, "grad_norm": 0.3332857548042333, "learning_rate": 8.303139423856015e-06, "loss": 0.1465, "step": 7111 }, { "epoch": 0.5677336952183284, "grad_norm": 0.31401938540636976, "learning_rate": 8.300591514333239e-06, "loss": 0.131, "step": 7112 }, { "epoch": 0.5678135227907719, "grad_norm": 0.3244308407170985, "learning_rate": 8.298043718409197e-06, "loss": 0.1691, "step": 7113 }, { "epoch": 0.5678933503632154, "grad_norm": 0.28075309699874706, "learning_rate": 8.295496036254194e-06, "loss": 0.1542, "step": 7114 }, { "epoch": 0.5679731779356589, "grad_norm": 0.32808488269599423, "learning_rate": 8.292948468038537e-06, "loss": 0.1942, "step": 7115 }, { "epoch": 0.5680530055081024, "grad_norm": 0.31522533943105396, "learning_rate": 8.29040101393252e-06, "loss": 0.1736, "step": 7116 }, { "epoch": 0.5681328330805461, "grad_norm": 0.26499973247233033, "learning_rate": 8.28785367410643e-06, "loss": 0.1328, "step": 7117 }, { "epoch": 0.5682126606529896, "grad_norm": 0.2550593689079235, "learning_rate": 8.285306448730544e-06, "loss": 0.1699, "step": 7118 }, { "epoch": 0.5682924882254331, "grad_norm": 0.2818688451977526, "learning_rate": 8.282759337975137e-06, "loss": 0.1842, "step": 7119 }, { "epoch": 0.5683723157978766, "grad_norm": 0.35187483298512173, "learning_rate": 8.280212342010478e-06, "loss": 0.1783, "step": 7120 }, { "epoch": 0.5684521433703201, "grad_norm": 0.3237028901197609, "learning_rate": 8.277665461006809e-06, "loss": 0.1538, "step": 7121 }, { "epoch": 0.5685319709427636, "grad_norm": 0.30879113169062916, "learning_rate": 8.275118695134394e-06, "loss": 0.1732, "step": 7122 }, { "epoch": 0.5686117985152072, "grad_norm": 0.29979886840597414, "learning_rate": 8.272572044563463e-06, "loss": 0.206, "step": 7123 }, { "epoch": 0.5686916260876507, "grad_norm": 0.3332542145527557, "learning_rate": 8.270025509464258e-06, "loss": 0.1218, "step": 7124 }, { "epoch": 0.5687714536600942, "grad_norm": 0.2665979666372306, "learning_rate": 8.267479090007001e-06, "loss": 0.1389, "step": 7125 }, { "epoch": 0.5688512812325377, "grad_norm": 0.3201388056022144, "learning_rate": 8.264932786361912e-06, "loss": 0.1679, "step": 7126 }, { "epoch": 0.5689311088049812, "grad_norm": 0.296324028367135, "learning_rate": 8.262386598699202e-06, "loss": 0.1924, "step": 7127 }, { "epoch": 0.5690109363774247, "grad_norm": 0.294875426041354, "learning_rate": 8.259840527189064e-06, "loss": 0.1738, "step": 7128 }, { "epoch": 0.5690907639498682, "grad_norm": 0.28348191717794596, "learning_rate": 8.257294572001707e-06, "loss": 0.1707, "step": 7129 }, { "epoch": 0.5691705915223118, "grad_norm": 0.28445218419083734, "learning_rate": 8.254748733307306e-06, "loss": 0.1739, "step": 7130 }, { "epoch": 0.5692504190947554, "grad_norm": 0.27285936048058007, "learning_rate": 8.25220301127605e-06, "loss": 0.1391, "step": 7131 }, { "epoch": 0.5693302466671989, "grad_norm": 0.33521649641470563, "learning_rate": 8.2496574060781e-06, "loss": 0.2202, "step": 7132 }, { "epoch": 0.5694100742396424, "grad_norm": 0.2752695191763218, "learning_rate": 8.247111917883634e-06, "loss": 0.1664, "step": 7133 }, { "epoch": 0.5694899018120859, "grad_norm": 0.28678891986299854, "learning_rate": 8.244566546862795e-06, "loss": 0.158, "step": 7134 }, { "epoch": 0.5695697293845294, "grad_norm": 0.2702446146568454, "learning_rate": 8.242021293185734e-06, "loss": 0.1403, "step": 7135 }, { "epoch": 0.569649556956973, "grad_norm": 0.28378104582681435, "learning_rate": 8.239476157022594e-06, "loss": 0.1653, "step": 7136 }, { "epoch": 0.5697293845294165, "grad_norm": 0.333957840940703, "learning_rate": 8.236931138543505e-06, "loss": 0.1558, "step": 7137 }, { "epoch": 0.56980921210186, "grad_norm": 0.24301881233878492, "learning_rate": 8.234386237918592e-06, "loss": 0.1495, "step": 7138 }, { "epoch": 0.5698890396743035, "grad_norm": 0.26131189152747614, "learning_rate": 8.231841455317972e-06, "loss": 0.1593, "step": 7139 }, { "epoch": 0.569968867246747, "grad_norm": 0.342595199244003, "learning_rate": 8.229296790911754e-06, "loss": 0.1737, "step": 7140 }, { "epoch": 0.5700486948191905, "grad_norm": 0.28694516147739835, "learning_rate": 8.226752244870038e-06, "loss": 0.1724, "step": 7141 }, { "epoch": 0.570128522391634, "grad_norm": 0.2767997063194204, "learning_rate": 8.224207817362917e-06, "loss": 0.1352, "step": 7142 }, { "epoch": 0.5702083499640775, "grad_norm": 0.27596386094562486, "learning_rate": 8.221663508560476e-06, "loss": 0.1882, "step": 7143 }, { "epoch": 0.5702881775365212, "grad_norm": 0.32066038227546134, "learning_rate": 8.21911931863279e-06, "loss": 0.1702, "step": 7144 }, { "epoch": 0.5703680051089647, "grad_norm": 0.30519871389586645, "learning_rate": 8.216575247749932e-06, "loss": 0.1518, "step": 7145 }, { "epoch": 0.5704478326814082, "grad_norm": 0.28712640793908006, "learning_rate": 8.214031296081959e-06, "loss": 0.1774, "step": 7146 }, { "epoch": 0.5705276602538517, "grad_norm": 0.338472906821917, "learning_rate": 8.211487463798928e-06, "loss": 0.1342, "step": 7147 }, { "epoch": 0.5706074878262952, "grad_norm": 0.2708533844974485, "learning_rate": 8.20894375107088e-06, "loss": 0.1306, "step": 7148 }, { "epoch": 0.5706873153987387, "grad_norm": 0.2821337179809365, "learning_rate": 8.206400158067857e-06, "loss": 0.1596, "step": 7149 }, { "epoch": 0.5707671429711823, "grad_norm": 0.2955287437060336, "learning_rate": 8.203856684959885e-06, "loss": 0.1996, "step": 7150 }, { "epoch": 0.5708469705436258, "grad_norm": 0.28610804305970644, "learning_rate": 8.201313331916982e-06, "loss": 0.1455, "step": 7151 }, { "epoch": 0.5709267981160693, "grad_norm": 0.27314687148417455, "learning_rate": 8.198770099109168e-06, "loss": 0.1287, "step": 7152 }, { "epoch": 0.5710066256885128, "grad_norm": 0.3239178365986709, "learning_rate": 8.196226986706443e-06, "loss": 0.2109, "step": 7153 }, { "epoch": 0.5710864532609563, "grad_norm": 0.2752846821608422, "learning_rate": 8.193683994878807e-06, "loss": 0.1418, "step": 7154 }, { "epoch": 0.5711662808333998, "grad_norm": 0.3419433840420225, "learning_rate": 8.191141123796246e-06, "loss": 0.2383, "step": 7155 }, { "epoch": 0.5712461084058433, "grad_norm": 0.3011639405640094, "learning_rate": 8.188598373628745e-06, "loss": 0.1542, "step": 7156 }, { "epoch": 0.5713259359782868, "grad_norm": 0.2605205526994135, "learning_rate": 8.186055744546273e-06, "loss": 0.165, "step": 7157 }, { "epoch": 0.5714057635507305, "grad_norm": 0.3873311094296855, "learning_rate": 8.183513236718795e-06, "loss": 0.2165, "step": 7158 }, { "epoch": 0.571485591123174, "grad_norm": 0.28407284003037303, "learning_rate": 8.18097085031627e-06, "loss": 0.1898, "step": 7159 }, { "epoch": 0.5715654186956175, "grad_norm": 0.3086681905900883, "learning_rate": 8.178428585508643e-06, "loss": 0.1738, "step": 7160 }, { "epoch": 0.571645246268061, "grad_norm": 0.267610482088496, "learning_rate": 8.175886442465857e-06, "loss": 0.1807, "step": 7161 }, { "epoch": 0.5717250738405045, "grad_norm": 0.28695626377895095, "learning_rate": 8.173344421357843e-06, "loss": 0.1902, "step": 7162 }, { "epoch": 0.571804901412948, "grad_norm": 0.28164589820430247, "learning_rate": 8.170802522354528e-06, "loss": 0.1749, "step": 7163 }, { "epoch": 0.5718847289853916, "grad_norm": 0.30209724598379845, "learning_rate": 8.168260745625827e-06, "loss": 0.1833, "step": 7164 }, { "epoch": 0.5719645565578351, "grad_norm": 0.27425188014268337, "learning_rate": 8.16571909134164e-06, "loss": 0.1352, "step": 7165 }, { "epoch": 0.5720443841302786, "grad_norm": 0.29235121331101493, "learning_rate": 8.163177559671879e-06, "loss": 0.219, "step": 7166 }, { "epoch": 0.5721242117027221, "grad_norm": 0.3143124911569375, "learning_rate": 8.160636150786421e-06, "loss": 0.1324, "step": 7167 }, { "epoch": 0.5722040392751656, "grad_norm": 0.3242272438268719, "learning_rate": 8.158094864855163e-06, "loss": 0.1648, "step": 7168 }, { "epoch": 0.5722838668476091, "grad_norm": 0.3233590502455959, "learning_rate": 8.155553702047973e-06, "loss": 0.1519, "step": 7169 }, { "epoch": 0.5723636944200526, "grad_norm": 0.3431688413558203, "learning_rate": 8.15301266253472e-06, "loss": 0.1605, "step": 7170 }, { "epoch": 0.5724435219924963, "grad_norm": 0.35334587565104897, "learning_rate": 8.150471746485263e-06, "loss": 0.17, "step": 7171 }, { "epoch": 0.5725233495649398, "grad_norm": 0.2702685542892262, "learning_rate": 8.147930954069445e-06, "loss": 0.1407, "step": 7172 }, { "epoch": 0.5726031771373833, "grad_norm": 0.311138948372886, "learning_rate": 8.14539028545712e-06, "loss": 0.2027, "step": 7173 }, { "epoch": 0.5726830047098268, "grad_norm": 0.2898214040414608, "learning_rate": 8.142849740818106e-06, "loss": 0.1217, "step": 7174 }, { "epoch": 0.5727628322822703, "grad_norm": 0.3398244246569573, "learning_rate": 8.140309320322245e-06, "loss": 0.1832, "step": 7175 }, { "epoch": 0.5728426598547138, "grad_norm": 0.3395534626059391, "learning_rate": 8.13776902413934e-06, "loss": 0.1869, "step": 7176 }, { "epoch": 0.5729224874271573, "grad_norm": 0.31448433845467366, "learning_rate": 8.135228852439213e-06, "loss": 0.1406, "step": 7177 }, { "epoch": 0.5730023149996009, "grad_norm": 0.28539147911700413, "learning_rate": 8.132688805391655e-06, "loss": 0.1671, "step": 7178 }, { "epoch": 0.5730821425720444, "grad_norm": 0.3447344270170826, "learning_rate": 8.13014888316646e-06, "loss": 0.1755, "step": 7179 }, { "epoch": 0.5731619701444879, "grad_norm": 0.3180680799824764, "learning_rate": 8.127609085933413e-06, "loss": 0.1666, "step": 7180 }, { "epoch": 0.5732417977169314, "grad_norm": 0.3724591527476726, "learning_rate": 8.125069413862285e-06, "loss": 0.1576, "step": 7181 }, { "epoch": 0.5733216252893749, "grad_norm": 0.3087313841320718, "learning_rate": 8.122529867122851e-06, "loss": 0.1667, "step": 7182 }, { "epoch": 0.5734014528618184, "grad_norm": 0.3118895334146445, "learning_rate": 8.119990445884862e-06, "loss": 0.1558, "step": 7183 }, { "epoch": 0.5734812804342619, "grad_norm": 0.3128181686622774, "learning_rate": 8.117451150318076e-06, "loss": 0.1387, "step": 7184 }, { "epoch": 0.5735611080067056, "grad_norm": 0.28679424376998436, "learning_rate": 8.114911980592226e-06, "loss": 0.1483, "step": 7185 }, { "epoch": 0.5736409355791491, "grad_norm": 0.3115282743903692, "learning_rate": 8.112372936877053e-06, "loss": 0.1752, "step": 7186 }, { "epoch": 0.5737207631515926, "grad_norm": 0.3384826794897947, "learning_rate": 8.10983401934228e-06, "loss": 0.1927, "step": 7187 }, { "epoch": 0.5738005907240361, "grad_norm": 0.23678936398434375, "learning_rate": 8.10729522815762e-06, "loss": 0.1851, "step": 7188 }, { "epoch": 0.5738804182964796, "grad_norm": 0.28172221138098763, "learning_rate": 8.104756563492787e-06, "loss": 0.1858, "step": 7189 }, { "epoch": 0.5739602458689231, "grad_norm": 0.3109609129717309, "learning_rate": 8.102218025517472e-06, "loss": 0.1894, "step": 7190 }, { "epoch": 0.5740400734413666, "grad_norm": 0.3099897967997182, "learning_rate": 8.099679614401376e-06, "loss": 0.1712, "step": 7191 }, { "epoch": 0.5741199010138102, "grad_norm": 0.3313441961391671, "learning_rate": 8.097141330314175e-06, "loss": 0.1394, "step": 7192 }, { "epoch": 0.5741997285862537, "grad_norm": 0.2878627540671653, "learning_rate": 8.094603173425548e-06, "loss": 0.132, "step": 7193 }, { "epoch": 0.5742795561586972, "grad_norm": 0.3074980760564911, "learning_rate": 8.09206514390516e-06, "loss": 0.2034, "step": 7194 }, { "epoch": 0.5743593837311407, "grad_norm": 0.2836859763543985, "learning_rate": 8.08952724192266e-06, "loss": 0.189, "step": 7195 }, { "epoch": 0.5744392113035842, "grad_norm": 0.3257343131778499, "learning_rate": 8.08698946764771e-06, "loss": 0.193, "step": 7196 }, { "epoch": 0.5745190388760277, "grad_norm": 0.29501694269076095, "learning_rate": 8.08445182124994e-06, "loss": 0.1676, "step": 7197 }, { "epoch": 0.5745988664484714, "grad_norm": 0.33042892703145954, "learning_rate": 8.081914302898987e-06, "loss": 0.1626, "step": 7198 }, { "epoch": 0.5746786940209149, "grad_norm": 0.2586629213934572, "learning_rate": 8.07937691276447e-06, "loss": 0.2053, "step": 7199 }, { "epoch": 0.5747585215933584, "grad_norm": 0.272414053718488, "learning_rate": 8.076839651016007e-06, "loss": 0.1646, "step": 7200 }, { "epoch": 0.5748383491658019, "grad_norm": 0.2689167650486151, "learning_rate": 8.074302517823203e-06, "loss": 0.2084, "step": 7201 }, { "epoch": 0.5749181767382454, "grad_norm": 0.26795356401626125, "learning_rate": 8.071765513355653e-06, "loss": 0.1442, "step": 7202 }, { "epoch": 0.5749980043106889, "grad_norm": 0.2652529281243139, "learning_rate": 8.06922863778295e-06, "loss": 0.1634, "step": 7203 }, { "epoch": 0.5750778318831324, "grad_norm": 0.24473348346641488, "learning_rate": 8.06669189127467e-06, "loss": 0.2299, "step": 7204 }, { "epoch": 0.575157659455576, "grad_norm": 0.26369269690697417, "learning_rate": 8.064155274000386e-06, "loss": 0.1807, "step": 7205 }, { "epoch": 0.5752374870280195, "grad_norm": 0.34068978242438974, "learning_rate": 8.06161878612966e-06, "loss": 0.1439, "step": 7206 }, { "epoch": 0.575317314600463, "grad_norm": 0.33622960681812475, "learning_rate": 8.059082427832052e-06, "loss": 0.1809, "step": 7207 }, { "epoch": 0.5753971421729065, "grad_norm": 0.3399065592417345, "learning_rate": 8.056546199277102e-06, "loss": 0.2308, "step": 7208 }, { "epoch": 0.57547696974535, "grad_norm": 0.31646048469088706, "learning_rate": 8.054010100634341e-06, "loss": 0.1608, "step": 7209 }, { "epoch": 0.5755567973177935, "grad_norm": 0.29163233011538175, "learning_rate": 8.051474132073312e-06, "loss": 0.191, "step": 7210 }, { "epoch": 0.575636624890237, "grad_norm": 0.2891448680945059, "learning_rate": 8.048938293763517e-06, "loss": 0.1653, "step": 7211 }, { "epoch": 0.5757164524626807, "grad_norm": 0.3236923369673199, "learning_rate": 8.046402585874484e-06, "loss": 0.1168, "step": 7212 }, { "epoch": 0.5757962800351242, "grad_norm": 0.2621549448947553, "learning_rate": 8.0438670085757e-06, "loss": 0.2174, "step": 7213 }, { "epoch": 0.5758761076075677, "grad_norm": 0.29190827972543304, "learning_rate": 8.04133156203667e-06, "loss": 0.1681, "step": 7214 }, { "epoch": 0.5759559351800112, "grad_norm": 0.2939755252782758, "learning_rate": 8.038796246426871e-06, "loss": 0.1977, "step": 7215 }, { "epoch": 0.5760357627524547, "grad_norm": 0.27052606349641695, "learning_rate": 8.036261061915784e-06, "loss": 0.1507, "step": 7216 }, { "epoch": 0.5761155903248982, "grad_norm": 0.26583372149876877, "learning_rate": 8.033726008672876e-06, "loss": 0.158, "step": 7217 }, { "epoch": 0.5761954178973417, "grad_norm": 0.2979732965099187, "learning_rate": 8.031191086867597e-06, "loss": 0.1152, "step": 7218 }, { "epoch": 0.5762752454697853, "grad_norm": 0.4277745621613427, "learning_rate": 8.028656296669408e-06, "loss": 0.1313, "step": 7219 }, { "epoch": 0.5763550730422288, "grad_norm": 0.3335721449351005, "learning_rate": 8.026121638247739e-06, "loss": 0.1447, "step": 7220 }, { "epoch": 0.5764349006146723, "grad_norm": 0.30378814581675667, "learning_rate": 8.023587111772032e-06, "loss": 0.2083, "step": 7221 }, { "epoch": 0.5765147281871158, "grad_norm": 0.3062337596687288, "learning_rate": 8.0210527174117e-06, "loss": 0.1762, "step": 7222 }, { "epoch": 0.5765945557595593, "grad_norm": 0.31235045932347594, "learning_rate": 8.018518455336168e-06, "loss": 0.1567, "step": 7223 }, { "epoch": 0.5766743833320028, "grad_norm": 0.33579750442260137, "learning_rate": 8.015984325714831e-06, "loss": 0.1665, "step": 7224 }, { "epoch": 0.5767542109044463, "grad_norm": 0.2823284243327088, "learning_rate": 8.013450328717092e-06, "loss": 0.1699, "step": 7225 }, { "epoch": 0.57683403847689, "grad_norm": 0.29429411420508506, "learning_rate": 8.010916464512337e-06, "loss": 0.134, "step": 7226 }, { "epoch": 0.5769138660493335, "grad_norm": 0.28845048780690846, "learning_rate": 8.008382733269945e-06, "loss": 0.1906, "step": 7227 }, { "epoch": 0.576993693621777, "grad_norm": 0.22841205190805527, "learning_rate": 8.005849135159285e-06, "loss": 0.148, "step": 7228 }, { "epoch": 0.5770735211942205, "grad_norm": 0.3558541878524224, "learning_rate": 8.003315670349716e-06, "loss": 0.1688, "step": 7229 }, { "epoch": 0.577153348766664, "grad_norm": 0.3031837205253596, "learning_rate": 8.000782339010597e-06, "loss": 0.176, "step": 7230 }, { "epoch": 0.5772331763391075, "grad_norm": 0.3247530541426195, "learning_rate": 7.998249141311265e-06, "loss": 0.1908, "step": 7231 }, { "epoch": 0.577313003911551, "grad_norm": 0.281048509895512, "learning_rate": 7.995716077421057e-06, "loss": 0.1867, "step": 7232 }, { "epoch": 0.5773928314839946, "grad_norm": 0.294003095985435, "learning_rate": 7.993183147509297e-06, "loss": 0.1924, "step": 7233 }, { "epoch": 0.5774726590564381, "grad_norm": 0.278547703026987, "learning_rate": 7.9906503517453e-06, "loss": 0.181, "step": 7234 }, { "epoch": 0.5775524866288816, "grad_norm": 0.32858072288932993, "learning_rate": 7.988117690298378e-06, "loss": 0.1558, "step": 7235 }, { "epoch": 0.5776323142013251, "grad_norm": 0.2794788806536367, "learning_rate": 7.985585163337823e-06, "loss": 0.2199, "step": 7236 }, { "epoch": 0.5777121417737686, "grad_norm": 0.2564412528354114, "learning_rate": 7.983052771032932e-06, "loss": 0.1539, "step": 7237 }, { "epoch": 0.5777919693462121, "grad_norm": 0.26188103377155014, "learning_rate": 7.98052051355298e-06, "loss": 0.168, "step": 7238 }, { "epoch": 0.5778717969186558, "grad_norm": 0.3024264771674326, "learning_rate": 7.977988391067239e-06, "loss": 0.1555, "step": 7239 }, { "epoch": 0.5779516244910993, "grad_norm": 0.3435512260699934, "learning_rate": 7.975456403744973e-06, "loss": 0.1414, "step": 7240 }, { "epoch": 0.5780314520635428, "grad_norm": 0.3042928684840874, "learning_rate": 7.972924551755432e-06, "loss": 0.1495, "step": 7241 }, { "epoch": 0.5781112796359863, "grad_norm": 0.31842152822245146, "learning_rate": 7.970392835267867e-06, "loss": 0.1792, "step": 7242 }, { "epoch": 0.5781911072084298, "grad_norm": 0.2663786835348056, "learning_rate": 7.967861254451503e-06, "loss": 0.1021, "step": 7243 }, { "epoch": 0.5782709347808733, "grad_norm": 0.2509023377212503, "learning_rate": 7.965329809475577e-06, "loss": 0.1998, "step": 7244 }, { "epoch": 0.5783507623533168, "grad_norm": 0.26452457876637087, "learning_rate": 7.962798500509301e-06, "loss": 0.2144, "step": 7245 }, { "epoch": 0.5784305899257604, "grad_norm": 0.3391796068298732, "learning_rate": 7.960267327721881e-06, "loss": 0.136, "step": 7246 }, { "epoch": 0.5785104174982039, "grad_norm": 0.27267801244425294, "learning_rate": 7.95773629128252e-06, "loss": 0.1894, "step": 7247 }, { "epoch": 0.5785902450706474, "grad_norm": 0.3217457173348134, "learning_rate": 7.955205391360403e-06, "loss": 0.1925, "step": 7248 }, { "epoch": 0.5786700726430909, "grad_norm": 0.3058218520391946, "learning_rate": 7.952674628124715e-06, "loss": 0.1647, "step": 7249 }, { "epoch": 0.5787499002155344, "grad_norm": 0.28139159021023735, "learning_rate": 7.950144001744626e-06, "loss": 0.1639, "step": 7250 }, { "epoch": 0.5788297277879779, "grad_norm": 0.29647310833253926, "learning_rate": 7.947613512389299e-06, "loss": 0.188, "step": 7251 }, { "epoch": 0.5789095553604214, "grad_norm": 0.42907839511643725, "learning_rate": 7.945083160227886e-06, "loss": 0.1902, "step": 7252 }, { "epoch": 0.5789893829328651, "grad_norm": 0.30612585756198146, "learning_rate": 7.942552945429532e-06, "loss": 0.14, "step": 7253 }, { "epoch": 0.5790692105053086, "grad_norm": 0.3207718860840102, "learning_rate": 7.940022868163374e-06, "loss": 0.1858, "step": 7254 }, { "epoch": 0.5791490380777521, "grad_norm": 0.274839840052261, "learning_rate": 7.93749292859853e-06, "loss": 0.1999, "step": 7255 }, { "epoch": 0.5792288656501956, "grad_norm": 0.2956626693496442, "learning_rate": 7.934963126904126e-06, "loss": 0.1802, "step": 7256 }, { "epoch": 0.5793086932226391, "grad_norm": 0.2868713628356371, "learning_rate": 7.932433463249259e-06, "loss": 0.134, "step": 7257 }, { "epoch": 0.5793885207950826, "grad_norm": 0.2740561513472609, "learning_rate": 7.92990393780304e-06, "loss": 0.1837, "step": 7258 }, { "epoch": 0.5794683483675261, "grad_norm": 0.27501346043597935, "learning_rate": 7.927374550734543e-06, "loss": 0.178, "step": 7259 }, { "epoch": 0.5795481759399697, "grad_norm": 0.28196165021873076, "learning_rate": 7.924845302212859e-06, "loss": 0.1774, "step": 7260 }, { "epoch": 0.5796280035124132, "grad_norm": 0.32817174770376073, "learning_rate": 7.922316192407057e-06, "loss": 0.1589, "step": 7261 }, { "epoch": 0.5797078310848567, "grad_norm": 0.34017941623884546, "learning_rate": 7.919787221486188e-06, "loss": 0.1964, "step": 7262 }, { "epoch": 0.5797876586573002, "grad_norm": 0.2889767587001772, "learning_rate": 7.91725838961932e-06, "loss": 0.1601, "step": 7263 }, { "epoch": 0.5798674862297437, "grad_norm": 0.3356665355778648, "learning_rate": 7.914729696975477e-06, "loss": 0.1979, "step": 7264 }, { "epoch": 0.5799473138021872, "grad_norm": 0.3225394934443617, "learning_rate": 7.91220114372371e-06, "loss": 0.1161, "step": 7265 }, { "epoch": 0.5800271413746309, "grad_norm": 0.3041345082590708, "learning_rate": 7.909672730033026e-06, "loss": 0.2285, "step": 7266 }, { "epoch": 0.5801069689470744, "grad_norm": 0.265660272970919, "learning_rate": 7.907144456072456e-06, "loss": 0.1361, "step": 7267 }, { "epoch": 0.5801867965195179, "grad_norm": 0.3053265848253605, "learning_rate": 7.904616322010992e-06, "loss": 0.1843, "step": 7268 }, { "epoch": 0.5802666240919614, "grad_norm": 0.25912357261539526, "learning_rate": 7.902088328017635e-06, "loss": 0.1543, "step": 7269 }, { "epoch": 0.5803464516644049, "grad_norm": 0.2561152202692077, "learning_rate": 7.899560474261372e-06, "loss": 0.1524, "step": 7270 }, { "epoch": 0.5804262792368484, "grad_norm": 0.2849260824827093, "learning_rate": 7.897032760911177e-06, "loss": 0.1392, "step": 7271 }, { "epoch": 0.5805061068092919, "grad_norm": 0.2850308446541978, "learning_rate": 7.894505188136022e-06, "loss": 0.1737, "step": 7272 }, { "epoch": 0.5805859343817354, "grad_norm": 0.26433382032640595, "learning_rate": 7.89197775610486e-06, "loss": 0.1834, "step": 7273 }, { "epoch": 0.580665761954179, "grad_norm": 0.30354074515742296, "learning_rate": 7.889450464986645e-06, "loss": 0.1642, "step": 7274 }, { "epoch": 0.5807455895266225, "grad_norm": 0.281081771849322, "learning_rate": 7.886923314950316e-06, "loss": 0.1827, "step": 7275 }, { "epoch": 0.580825417099066, "grad_norm": 0.28029420138615585, "learning_rate": 7.884396306164796e-06, "loss": 0.1604, "step": 7276 }, { "epoch": 0.5809052446715095, "grad_norm": 0.28517844392041225, "learning_rate": 7.881869438799015e-06, "loss": 0.139, "step": 7277 }, { "epoch": 0.580985072243953, "grad_norm": 0.3519540512164097, "learning_rate": 7.879342713021875e-06, "loss": 0.1244, "step": 7278 }, { "epoch": 0.5810648998163965, "grad_norm": 0.31309615264142887, "learning_rate": 7.876816129002287e-06, "loss": 0.1452, "step": 7279 }, { "epoch": 0.5811447273888402, "grad_norm": 0.27395209757861533, "learning_rate": 7.874289686909136e-06, "loss": 0.1958, "step": 7280 }, { "epoch": 0.5812245549612837, "grad_norm": 0.2727744347115658, "learning_rate": 7.871763386911308e-06, "loss": 0.1389, "step": 7281 }, { "epoch": 0.5813043825337272, "grad_norm": 0.26038637004731846, "learning_rate": 7.869237229177673e-06, "loss": 0.1244, "step": 7282 }, { "epoch": 0.5813842101061707, "grad_norm": 0.31424036104861924, "learning_rate": 7.8667112138771e-06, "loss": 0.1284, "step": 7283 }, { "epoch": 0.5814640376786142, "grad_norm": 0.29489753325151946, "learning_rate": 7.86418534117844e-06, "loss": 0.1829, "step": 7284 }, { "epoch": 0.5815438652510577, "grad_norm": 0.2848902820140209, "learning_rate": 7.861659611250534e-06, "loss": 0.1277, "step": 7285 }, { "epoch": 0.5816236928235012, "grad_norm": 0.31305334798728535, "learning_rate": 7.859134024262226e-06, "loss": 0.1616, "step": 7286 }, { "epoch": 0.5817035203959448, "grad_norm": 0.2770575231849585, "learning_rate": 7.856608580382331e-06, "loss": 0.1635, "step": 7287 }, { "epoch": 0.5817833479683883, "grad_norm": 0.27680332106098454, "learning_rate": 7.854083279779673e-06, "loss": 0.218, "step": 7288 }, { "epoch": 0.5818631755408318, "grad_norm": 0.2686025864248572, "learning_rate": 7.851558122623054e-06, "loss": 0.1402, "step": 7289 }, { "epoch": 0.5819430031132753, "grad_norm": 0.2625140025480703, "learning_rate": 7.849033109081275e-06, "loss": 0.1732, "step": 7290 }, { "epoch": 0.5820228306857188, "grad_norm": 0.26803843067589134, "learning_rate": 7.846508239323119e-06, "loss": 0.2154, "step": 7291 }, { "epoch": 0.5821026582581623, "grad_norm": 0.3188847765725624, "learning_rate": 7.843983513517363e-06, "loss": 0.1733, "step": 7292 }, { "epoch": 0.582182485830606, "grad_norm": 0.33998351693687034, "learning_rate": 7.841458931832778e-06, "loss": 0.1757, "step": 7293 }, { "epoch": 0.5822623134030495, "grad_norm": 0.30688062650283665, "learning_rate": 7.83893449443812e-06, "loss": 0.2203, "step": 7294 }, { "epoch": 0.582342140975493, "grad_norm": 0.25032812560787493, "learning_rate": 7.83641020150214e-06, "loss": 0.17, "step": 7295 }, { "epoch": 0.5824219685479365, "grad_norm": 0.2654921475577913, "learning_rate": 7.833886053193576e-06, "loss": 0.1362, "step": 7296 }, { "epoch": 0.58250179612038, "grad_norm": 0.2817755560256867, "learning_rate": 7.831362049681158e-06, "loss": 0.1535, "step": 7297 }, { "epoch": 0.5825816236928235, "grad_norm": 0.29689081502742787, "learning_rate": 7.828838191133607e-06, "loss": 0.1852, "step": 7298 }, { "epoch": 0.582661451265267, "grad_norm": 0.2665271307458513, "learning_rate": 7.826314477719625e-06, "loss": 0.148, "step": 7299 }, { "epoch": 0.5827412788377105, "grad_norm": 0.24083383794584934, "learning_rate": 7.823790909607922e-06, "loss": 0.1299, "step": 7300 }, { "epoch": 0.582821106410154, "grad_norm": 0.2604845428670207, "learning_rate": 7.82126748696718e-06, "loss": 0.1441, "step": 7301 }, { "epoch": 0.5829009339825976, "grad_norm": 0.2755633625445855, "learning_rate": 7.81874420996609e-06, "loss": 0.1693, "step": 7302 }, { "epoch": 0.5829807615550411, "grad_norm": 0.24010102434149658, "learning_rate": 7.816221078773313e-06, "loss": 0.1821, "step": 7303 }, { "epoch": 0.5830605891274846, "grad_norm": 0.3084023990675439, "learning_rate": 7.813698093557519e-06, "loss": 0.2072, "step": 7304 }, { "epoch": 0.5831404166999281, "grad_norm": 0.30245045384715047, "learning_rate": 7.811175254487353e-06, "loss": 0.1238, "step": 7305 }, { "epoch": 0.5832202442723716, "grad_norm": 0.3139341755320558, "learning_rate": 7.808652561731455e-06, "loss": 0.1789, "step": 7306 }, { "epoch": 0.5833000718448152, "grad_norm": 0.3142300098118761, "learning_rate": 7.806130015458468e-06, "loss": 0.1779, "step": 7307 }, { "epoch": 0.5833798994172588, "grad_norm": 0.3191773762588665, "learning_rate": 7.803607615837002e-06, "loss": 0.2082, "step": 7308 }, { "epoch": 0.5834597269897023, "grad_norm": 0.31971588232596904, "learning_rate": 7.801085363035681e-06, "loss": 0.2233, "step": 7309 }, { "epoch": 0.5835395545621458, "grad_norm": 0.3001732498347806, "learning_rate": 7.798563257223093e-06, "loss": 0.1703, "step": 7310 }, { "epoch": 0.5836193821345893, "grad_norm": 0.2647068331673552, "learning_rate": 7.796041298567848e-06, "loss": 0.163, "step": 7311 }, { "epoch": 0.5836992097070328, "grad_norm": 0.29814011867819196, "learning_rate": 7.793519487238516e-06, "loss": 0.1693, "step": 7312 }, { "epoch": 0.5837790372794763, "grad_norm": 0.40660769515930567, "learning_rate": 7.790997823403675e-06, "loss": 0.1718, "step": 7313 }, { "epoch": 0.5838588648519198, "grad_norm": 0.30838569756370404, "learning_rate": 7.788476307231887e-06, "loss": 0.1609, "step": 7314 }, { "epoch": 0.5839386924243634, "grad_norm": 0.2552992741188371, "learning_rate": 7.785954938891704e-06, "loss": 0.1765, "step": 7315 }, { "epoch": 0.5840185199968069, "grad_norm": 0.2929276189106103, "learning_rate": 7.783433718551676e-06, "loss": 0.1795, "step": 7316 }, { "epoch": 0.5840983475692504, "grad_norm": 0.3564830124604284, "learning_rate": 7.780912646380327e-06, "loss": 0.1819, "step": 7317 }, { "epoch": 0.5841781751416939, "grad_norm": 0.33618442205451643, "learning_rate": 7.77839172254619e-06, "loss": 0.1906, "step": 7318 }, { "epoch": 0.5842580027141374, "grad_norm": 0.32610695627649006, "learning_rate": 7.77587094721777e-06, "loss": 0.1942, "step": 7319 }, { "epoch": 0.584337830286581, "grad_norm": 0.3037986712556624, "learning_rate": 7.77335032056358e-06, "loss": 0.1525, "step": 7320 }, { "epoch": 0.5844176578590246, "grad_norm": 0.2849628611646732, "learning_rate": 7.770829842752108e-06, "loss": 0.184, "step": 7321 }, { "epoch": 0.5844974854314681, "grad_norm": 0.24784515616221717, "learning_rate": 7.768309513951838e-06, "loss": 0.1588, "step": 7322 }, { "epoch": 0.5845773130039116, "grad_norm": 0.2879492526881351, "learning_rate": 7.765789334331246e-06, "loss": 0.1915, "step": 7323 }, { "epoch": 0.5846571405763551, "grad_norm": 0.3800560571160412, "learning_rate": 7.763269304058793e-06, "loss": 0.1412, "step": 7324 }, { "epoch": 0.5847369681487986, "grad_norm": 0.24144463783214845, "learning_rate": 7.760749423302939e-06, "loss": 0.1744, "step": 7325 }, { "epoch": 0.5848167957212421, "grad_norm": 0.33372820968736033, "learning_rate": 7.758229692232122e-06, "loss": 0.1296, "step": 7326 }, { "epoch": 0.5848966232936856, "grad_norm": 0.27440484272643567, "learning_rate": 7.75571011101478e-06, "loss": 0.1564, "step": 7327 }, { "epoch": 0.5849764508661291, "grad_norm": 0.3604967682409451, "learning_rate": 7.753190679819336e-06, "loss": 0.1873, "step": 7328 }, { "epoch": 0.5850562784385727, "grad_norm": 0.2510910338407596, "learning_rate": 7.7506713988142e-06, "loss": 0.1246, "step": 7329 }, { "epoch": 0.5851361060110162, "grad_norm": 0.26891248513359706, "learning_rate": 7.748152268167784e-06, "loss": 0.1871, "step": 7330 }, { "epoch": 0.5852159335834597, "grad_norm": 0.29197512164144074, "learning_rate": 7.745633288048473e-06, "loss": 0.1742, "step": 7331 }, { "epoch": 0.5852957611559032, "grad_norm": 0.276647413651379, "learning_rate": 7.743114458624658e-06, "loss": 0.1709, "step": 7332 }, { "epoch": 0.5853755887283467, "grad_norm": 0.2757091693969009, "learning_rate": 7.740595780064708e-06, "loss": 0.1542, "step": 7333 }, { "epoch": 0.5854554163007903, "grad_norm": 0.2861468390951495, "learning_rate": 7.738077252536992e-06, "loss": 0.1485, "step": 7334 }, { "epoch": 0.5855352438732339, "grad_norm": 0.28277636159308817, "learning_rate": 7.735558876209859e-06, "loss": 0.1575, "step": 7335 }, { "epoch": 0.5856150714456774, "grad_norm": 0.2830059486982437, "learning_rate": 7.73304065125165e-06, "loss": 0.1621, "step": 7336 }, { "epoch": 0.5856948990181209, "grad_norm": 0.28198578588042544, "learning_rate": 7.730522577830706e-06, "loss": 0.1759, "step": 7337 }, { "epoch": 0.5857747265905644, "grad_norm": 0.29567311957774733, "learning_rate": 7.728004656115345e-06, "loss": 0.1877, "step": 7338 }, { "epoch": 0.5858545541630079, "grad_norm": 0.29646626463695136, "learning_rate": 7.725486886273882e-06, "loss": 0.1162, "step": 7339 }, { "epoch": 0.5859343817354514, "grad_norm": 0.2788344477124566, "learning_rate": 7.72296926847462e-06, "loss": 0.157, "step": 7340 }, { "epoch": 0.5860142093078949, "grad_norm": 0.3023990316609378, "learning_rate": 7.72045180288585e-06, "loss": 0.1852, "step": 7341 }, { "epoch": 0.5860940368803385, "grad_norm": 0.3223154410136279, "learning_rate": 7.71793448967586e-06, "loss": 0.1772, "step": 7342 }, { "epoch": 0.586173864452782, "grad_norm": 0.30683472612275464, "learning_rate": 7.715417329012912e-06, "loss": 0.1964, "step": 7343 }, { "epoch": 0.5862536920252255, "grad_norm": 0.28520778814804937, "learning_rate": 7.71290032106528e-06, "loss": 0.1562, "step": 7344 }, { "epoch": 0.586333519597669, "grad_norm": 0.2294695615890225, "learning_rate": 7.710383466001205e-06, "loss": 0.1503, "step": 7345 }, { "epoch": 0.5864133471701125, "grad_norm": 0.25587778840135345, "learning_rate": 7.70786676398894e-06, "loss": 0.1657, "step": 7346 }, { "epoch": 0.5864931747425561, "grad_norm": 0.2708862706873399, "learning_rate": 7.705350215196705e-06, "loss": 0.149, "step": 7347 }, { "epoch": 0.5865730023149996, "grad_norm": 0.2723829412654927, "learning_rate": 7.702833819792734e-06, "loss": 0.1825, "step": 7348 }, { "epoch": 0.5866528298874432, "grad_norm": 0.28257959207547256, "learning_rate": 7.700317577945223e-06, "loss": 0.1429, "step": 7349 }, { "epoch": 0.5867326574598867, "grad_norm": 0.27902582179943963, "learning_rate": 7.697801489822388e-06, "loss": 0.1748, "step": 7350 }, { "epoch": 0.5868124850323302, "grad_norm": 0.27196701387442457, "learning_rate": 7.695285555592412e-06, "loss": 0.2613, "step": 7351 }, { "epoch": 0.5868923126047737, "grad_norm": 0.2868231045232864, "learning_rate": 7.692769775423471e-06, "loss": 0.1312, "step": 7352 }, { "epoch": 0.5869721401772172, "grad_norm": 0.31004999332662964, "learning_rate": 7.690254149483745e-06, "loss": 0.145, "step": 7353 }, { "epoch": 0.5870519677496607, "grad_norm": 0.2935710655677261, "learning_rate": 7.687738677941383e-06, "loss": 0.1628, "step": 7354 }, { "epoch": 0.5871317953221042, "grad_norm": 0.325408049091045, "learning_rate": 7.685223360964546e-06, "loss": 0.1574, "step": 7355 }, { "epoch": 0.5872116228945478, "grad_norm": 0.339667470227745, "learning_rate": 7.682708198721362e-06, "loss": 0.1422, "step": 7356 }, { "epoch": 0.5872914504669913, "grad_norm": 0.3507362510922045, "learning_rate": 7.680193191379968e-06, "loss": 0.1655, "step": 7357 }, { "epoch": 0.5873712780394348, "grad_norm": 0.2847993145025447, "learning_rate": 7.67767833910848e-06, "loss": 0.1285, "step": 7358 }, { "epoch": 0.5874511056118783, "grad_norm": 0.25077273880053547, "learning_rate": 7.675163642075e-06, "loss": 0.162, "step": 7359 }, { "epoch": 0.5875309331843218, "grad_norm": 0.28638754679623957, "learning_rate": 7.672649100447632e-06, "loss": 0.1448, "step": 7360 }, { "epoch": 0.5876107607567654, "grad_norm": 0.300799483304056, "learning_rate": 7.67013471439446e-06, "loss": 0.1528, "step": 7361 }, { "epoch": 0.587690588329209, "grad_norm": 0.31122743511490475, "learning_rate": 7.667620484083565e-06, "loss": 0.241, "step": 7362 }, { "epoch": 0.5877704159016525, "grad_norm": 0.28056918986365925, "learning_rate": 7.665106409683008e-06, "loss": 0.175, "step": 7363 }, { "epoch": 0.587850243474096, "grad_norm": 0.25922715341178726, "learning_rate": 7.662592491360848e-06, "loss": 0.2211, "step": 7364 }, { "epoch": 0.5879300710465395, "grad_norm": 0.3286698205249925, "learning_rate": 7.660078729285132e-06, "loss": 0.1761, "step": 7365 }, { "epoch": 0.588009898618983, "grad_norm": 0.30136899948194956, "learning_rate": 7.65756512362389e-06, "loss": 0.169, "step": 7366 }, { "epoch": 0.5880897261914265, "grad_norm": 0.26830278248893824, "learning_rate": 7.65505167454515e-06, "loss": 0.2028, "step": 7367 }, { "epoch": 0.58816955376387, "grad_norm": 0.27994665808643476, "learning_rate": 7.652538382216925e-06, "loss": 0.1536, "step": 7368 }, { "epoch": 0.5882493813363135, "grad_norm": 0.34582568372678035, "learning_rate": 7.65002524680722e-06, "loss": 0.1874, "step": 7369 }, { "epoch": 0.5883292089087571, "grad_norm": 0.29694527565437895, "learning_rate": 7.647512268484027e-06, "loss": 0.1764, "step": 7370 }, { "epoch": 0.5884090364812006, "grad_norm": 0.2715045346335443, "learning_rate": 7.64499944741533e-06, "loss": 0.2285, "step": 7371 }, { "epoch": 0.5884888640536441, "grad_norm": 0.2665786746347637, "learning_rate": 7.6424867837691e-06, "loss": 0.1694, "step": 7372 }, { "epoch": 0.5885686916260876, "grad_norm": 0.31068553268158106, "learning_rate": 7.639974277713298e-06, "loss": 0.1923, "step": 7373 }, { "epoch": 0.5886485191985311, "grad_norm": 0.31101062334516166, "learning_rate": 7.637461929415878e-06, "loss": 0.1809, "step": 7374 }, { "epoch": 0.5887283467709747, "grad_norm": 0.2711527314807433, "learning_rate": 7.634949739044775e-06, "loss": 0.1679, "step": 7375 }, { "epoch": 0.5888081743434183, "grad_norm": 0.292442517512908, "learning_rate": 7.632437706767926e-06, "loss": 0.2244, "step": 7376 }, { "epoch": 0.5888880019158618, "grad_norm": 0.2614829754165191, "learning_rate": 7.629925832753244e-06, "loss": 0.1248, "step": 7377 }, { "epoch": 0.5889678294883053, "grad_norm": 0.25394526892512764, "learning_rate": 7.627414117168644e-06, "loss": 0.178, "step": 7378 }, { "epoch": 0.5890476570607488, "grad_norm": 0.30063844333201106, "learning_rate": 7.624902560182017e-06, "loss": 0.1857, "step": 7379 }, { "epoch": 0.5891274846331923, "grad_norm": 0.2901426549468736, "learning_rate": 7.6223911619612575e-06, "loss": 0.1925, "step": 7380 }, { "epoch": 0.5892073122056358, "grad_norm": 0.32980169659146286, "learning_rate": 7.619879922674239e-06, "loss": 0.2049, "step": 7381 }, { "epoch": 0.5892871397780793, "grad_norm": 0.2728457240332767, "learning_rate": 7.617368842488828e-06, "loss": 0.1558, "step": 7382 }, { "epoch": 0.5893669673505229, "grad_norm": 0.30203677533612383, "learning_rate": 7.614857921572881e-06, "loss": 0.1871, "step": 7383 }, { "epoch": 0.5894467949229664, "grad_norm": 0.3075106493123608, "learning_rate": 7.6123471600942425e-06, "loss": 0.1492, "step": 7384 }, { "epoch": 0.5895266224954099, "grad_norm": 0.2665890297519255, "learning_rate": 7.609836558220749e-06, "loss": 0.1366, "step": 7385 }, { "epoch": 0.5896064500678534, "grad_norm": 0.2550508140558276, "learning_rate": 7.60732611612022e-06, "loss": 0.1683, "step": 7386 }, { "epoch": 0.5896862776402969, "grad_norm": 0.2513584054589779, "learning_rate": 7.604815833960474e-06, "loss": 0.1582, "step": 7387 }, { "epoch": 0.5897661052127405, "grad_norm": 0.31964087690476944, "learning_rate": 7.602305711909313e-06, "loss": 0.1821, "step": 7388 }, { "epoch": 0.589845932785184, "grad_norm": 0.266683424591509, "learning_rate": 7.599795750134519e-06, "loss": 0.1547, "step": 7389 }, { "epoch": 0.5899257603576276, "grad_norm": 0.312979228560444, "learning_rate": 7.597285948803887e-06, "loss": 0.1814, "step": 7390 }, { "epoch": 0.5900055879300711, "grad_norm": 0.264197918953895, "learning_rate": 7.594776308085176e-06, "loss": 0.1787, "step": 7391 }, { "epoch": 0.5900854155025146, "grad_norm": 0.27869562188263475, "learning_rate": 7.592266828146155e-06, "loss": 0.1736, "step": 7392 }, { "epoch": 0.5901652430749581, "grad_norm": 0.2818641843076598, "learning_rate": 7.589757509154563e-06, "loss": 0.1713, "step": 7393 }, { "epoch": 0.5902450706474016, "grad_norm": 0.2979281152032698, "learning_rate": 7.587248351278149e-06, "loss": 0.1615, "step": 7394 }, { "epoch": 0.5903248982198451, "grad_norm": 0.32270883081382334, "learning_rate": 7.584739354684631e-06, "loss": 0.173, "step": 7395 }, { "epoch": 0.5904047257922886, "grad_norm": 0.3277084410876403, "learning_rate": 7.582230519541729e-06, "loss": 0.1561, "step": 7396 }, { "epoch": 0.5904845533647322, "grad_norm": 0.3061975146894509, "learning_rate": 7.579721846017149e-06, "loss": 0.1177, "step": 7397 }, { "epoch": 0.5905643809371757, "grad_norm": 0.2682976919537171, "learning_rate": 7.577213334278583e-06, "loss": 0.1676, "step": 7398 }, { "epoch": 0.5906442085096192, "grad_norm": 0.31757095449250866, "learning_rate": 7.574704984493724e-06, "loss": 0.1771, "step": 7399 }, { "epoch": 0.5907240360820627, "grad_norm": 0.28827191948039677, "learning_rate": 7.572196796830231e-06, "loss": 0.2161, "step": 7400 }, { "epoch": 0.5908038636545062, "grad_norm": 0.2838196730623278, "learning_rate": 7.5696887714557805e-06, "loss": 0.1314, "step": 7401 }, { "epoch": 0.5908836912269498, "grad_norm": 0.28551016223481984, "learning_rate": 7.567180908538017e-06, "loss": 0.1387, "step": 7402 }, { "epoch": 0.5909635187993934, "grad_norm": 0.2700540929245376, "learning_rate": 7.56467320824458e-06, "loss": 0.206, "step": 7403 }, { "epoch": 0.5910433463718369, "grad_norm": 0.310529318443605, "learning_rate": 7.562165670743102e-06, "loss": 0.1788, "step": 7404 }, { "epoch": 0.5911231739442804, "grad_norm": 0.3049815661145874, "learning_rate": 7.5596582962012e-06, "loss": 0.1662, "step": 7405 }, { "epoch": 0.5912030015167239, "grad_norm": 0.28327830181191066, "learning_rate": 7.557151084786487e-06, "loss": 0.1284, "step": 7406 }, { "epoch": 0.5912828290891674, "grad_norm": 0.30174233838969583, "learning_rate": 7.554644036666553e-06, "loss": 0.1364, "step": 7407 }, { "epoch": 0.5913626566616109, "grad_norm": 0.3796188406874718, "learning_rate": 7.552137152008989e-06, "loss": 0.143, "step": 7408 }, { "epoch": 0.5914424842340544, "grad_norm": 0.3272136758148725, "learning_rate": 7.549630430981372e-06, "loss": 0.1323, "step": 7409 }, { "epoch": 0.591522311806498, "grad_norm": 0.39817440774603663, "learning_rate": 7.547123873751259e-06, "loss": 0.185, "step": 7410 }, { "epoch": 0.5916021393789415, "grad_norm": 0.261433352364528, "learning_rate": 7.544617480486212e-06, "loss": 0.1449, "step": 7411 }, { "epoch": 0.591681966951385, "grad_norm": 0.28883680513775445, "learning_rate": 7.5421112513537654e-06, "loss": 0.1565, "step": 7412 }, { "epoch": 0.5917617945238285, "grad_norm": 0.2915997814528434, "learning_rate": 7.539605186521457e-06, "loss": 0.1771, "step": 7413 }, { "epoch": 0.591841622096272, "grad_norm": 0.2637627678213216, "learning_rate": 7.537099286156804e-06, "loss": 0.2186, "step": 7414 }, { "epoch": 0.5919214496687156, "grad_norm": 0.2834048532452859, "learning_rate": 7.534593550427319e-06, "loss": 0.1473, "step": 7415 }, { "epoch": 0.5920012772411591, "grad_norm": 0.3205522164517078, "learning_rate": 7.5320879795004965e-06, "loss": 0.1929, "step": 7416 }, { "epoch": 0.5920811048136027, "grad_norm": 0.2889674121238959, "learning_rate": 7.529582573543827e-06, "loss": 0.134, "step": 7417 }, { "epoch": 0.5921609323860462, "grad_norm": 0.27820707982298787, "learning_rate": 7.527077332724787e-06, "loss": 0.1192, "step": 7418 }, { "epoch": 0.5922407599584897, "grad_norm": 0.291177709634549, "learning_rate": 7.524572257210838e-06, "loss": 0.1447, "step": 7419 }, { "epoch": 0.5923205875309332, "grad_norm": 0.25894891582938495, "learning_rate": 7.52206734716944e-06, "loss": 0.2292, "step": 7420 }, { "epoch": 0.5924004151033767, "grad_norm": 0.3105312252867754, "learning_rate": 7.5195626027680315e-06, "loss": 0.1773, "step": 7421 }, { "epoch": 0.5924802426758202, "grad_norm": 0.3043637884545782, "learning_rate": 7.517058024174049e-06, "loss": 0.1502, "step": 7422 }, { "epoch": 0.5925600702482637, "grad_norm": 0.31444786037192574, "learning_rate": 7.514553611554909e-06, "loss": 0.1529, "step": 7423 }, { "epoch": 0.5926398978207073, "grad_norm": 0.25684536186538354, "learning_rate": 7.512049365078027e-06, "loss": 0.1856, "step": 7424 }, { "epoch": 0.5927197253931508, "grad_norm": 0.3108722735412593, "learning_rate": 7.509545284910801e-06, "loss": 0.1254, "step": 7425 }, { "epoch": 0.5927995529655943, "grad_norm": 0.27832955964510486, "learning_rate": 7.50704137122061e-06, "loss": 0.1894, "step": 7426 }, { "epoch": 0.5928793805380378, "grad_norm": 0.36384221033802117, "learning_rate": 7.504537624174843e-06, "loss": 0.1259, "step": 7427 }, { "epoch": 0.5929592081104813, "grad_norm": 0.29212146386174065, "learning_rate": 7.5020340439408565e-06, "loss": 0.1822, "step": 7428 }, { "epoch": 0.5930390356829249, "grad_norm": 0.28379870767160875, "learning_rate": 7.4995306306860114e-06, "loss": 0.1686, "step": 7429 }, { "epoch": 0.5931188632553684, "grad_norm": 0.28418507202876264, "learning_rate": 7.497027384577647e-06, "loss": 0.1688, "step": 7430 }, { "epoch": 0.593198690827812, "grad_norm": 0.3361963835455262, "learning_rate": 7.494524305783098e-06, "loss": 0.1551, "step": 7431 }, { "epoch": 0.5932785184002555, "grad_norm": 0.26404060605151636, "learning_rate": 7.4920213944696864e-06, "loss": 0.1417, "step": 7432 }, { "epoch": 0.593358345972699, "grad_norm": 0.26768986209308165, "learning_rate": 7.489518650804712e-06, "loss": 0.1625, "step": 7433 }, { "epoch": 0.5934381735451425, "grad_norm": 0.335472741351839, "learning_rate": 7.487016074955488e-06, "loss": 0.179, "step": 7434 }, { "epoch": 0.593518001117586, "grad_norm": 0.309679249032719, "learning_rate": 7.484513667089288e-06, "loss": 0.1618, "step": 7435 }, { "epoch": 0.5935978286900295, "grad_norm": 0.3359262933569916, "learning_rate": 7.4820114273734015e-06, "loss": 0.2058, "step": 7436 }, { "epoch": 0.593677656262473, "grad_norm": 0.334293457716183, "learning_rate": 7.479509355975078e-06, "loss": 0.1718, "step": 7437 }, { "epoch": 0.5937574838349166, "grad_norm": 0.28193307223527764, "learning_rate": 7.477007453061587e-06, "loss": 0.1785, "step": 7438 }, { "epoch": 0.5938373114073601, "grad_norm": 0.3252998351513182, "learning_rate": 7.474505718800162e-06, "loss": 0.1647, "step": 7439 }, { "epoch": 0.5939171389798036, "grad_norm": 0.27961560692244025, "learning_rate": 7.472004153358032e-06, "loss": 0.1638, "step": 7440 }, { "epoch": 0.5939969665522471, "grad_norm": 0.27918076932875263, "learning_rate": 7.469502756902423e-06, "loss": 0.1455, "step": 7441 }, { "epoch": 0.5940767941246907, "grad_norm": 0.29333606891826186, "learning_rate": 7.467001529600537e-06, "loss": 0.1775, "step": 7442 }, { "epoch": 0.5941566216971342, "grad_norm": 0.2385935168971776, "learning_rate": 7.464500471619578e-06, "loss": 0.1399, "step": 7443 }, { "epoch": 0.5942364492695777, "grad_norm": 0.3209527848792142, "learning_rate": 7.461999583126725e-06, "loss": 0.1713, "step": 7444 }, { "epoch": 0.5943162768420213, "grad_norm": 0.2948140971436628, "learning_rate": 7.459498864289159e-06, "loss": 0.1895, "step": 7445 }, { "epoch": 0.5943961044144648, "grad_norm": 0.29480355362262844, "learning_rate": 7.456998315274035e-06, "loss": 0.177, "step": 7446 }, { "epoch": 0.5944759319869083, "grad_norm": 0.28330533774301675, "learning_rate": 7.4544979362485174e-06, "loss": 0.185, "step": 7447 }, { "epoch": 0.5945557595593518, "grad_norm": 0.31723111550570515, "learning_rate": 7.4519977273797375e-06, "loss": 0.1891, "step": 7448 }, { "epoch": 0.5946355871317953, "grad_norm": 0.31850808128184555, "learning_rate": 7.449497688834823e-06, "loss": 0.1432, "step": 7449 }, { "epoch": 0.5947154147042388, "grad_norm": 0.28335473893592744, "learning_rate": 7.446997820780897e-06, "loss": 0.1829, "step": 7450 }, { "epoch": 0.5947952422766823, "grad_norm": 0.25994236734422627, "learning_rate": 7.44449812338506e-06, "loss": 0.1501, "step": 7451 }, { "epoch": 0.5948750698491259, "grad_norm": 0.295640854332491, "learning_rate": 7.4419985968144125e-06, "loss": 0.1745, "step": 7452 }, { "epoch": 0.5949548974215694, "grad_norm": 0.299537252567438, "learning_rate": 7.439499241236035e-06, "loss": 0.1825, "step": 7453 }, { "epoch": 0.5950347249940129, "grad_norm": 0.293093158459855, "learning_rate": 7.437000056817002e-06, "loss": 0.1548, "step": 7454 }, { "epoch": 0.5951145525664564, "grad_norm": 0.2859380223782643, "learning_rate": 7.434501043724372e-06, "loss": 0.1834, "step": 7455 }, { "epoch": 0.5951943801389, "grad_norm": 0.3133415684151522, "learning_rate": 7.432002202125193e-06, "loss": 0.1521, "step": 7456 }, { "epoch": 0.5952742077113435, "grad_norm": 0.31237291711205295, "learning_rate": 7.429503532186505e-06, "loss": 0.196, "step": 7457 }, { "epoch": 0.595354035283787, "grad_norm": 0.30409162051149735, "learning_rate": 7.427005034075332e-06, "loss": 0.1454, "step": 7458 }, { "epoch": 0.5954338628562306, "grad_norm": 0.35547086035910797, "learning_rate": 7.424506707958692e-06, "loss": 0.1516, "step": 7459 }, { "epoch": 0.5955136904286741, "grad_norm": 0.305487337117726, "learning_rate": 7.4220085540035835e-06, "loss": 0.1457, "step": 7460 }, { "epoch": 0.5955935180011176, "grad_norm": 0.27905267318609667, "learning_rate": 7.419510572377005e-06, "loss": 0.143, "step": 7461 }, { "epoch": 0.5956733455735611, "grad_norm": 0.3066590220850893, "learning_rate": 7.41701276324593e-06, "loss": 0.1971, "step": 7462 }, { "epoch": 0.5957531731460046, "grad_norm": 0.2736391332839001, "learning_rate": 7.41451512677733e-06, "loss": 0.1343, "step": 7463 }, { "epoch": 0.5958330007184481, "grad_norm": 0.3328331620932871, "learning_rate": 7.412017663138163e-06, "loss": 0.1534, "step": 7464 }, { "epoch": 0.5959128282908916, "grad_norm": 0.40523572402703933, "learning_rate": 7.409520372495371e-06, "loss": 0.1557, "step": 7465 }, { "epoch": 0.5959926558633352, "grad_norm": 0.34036819432503623, "learning_rate": 7.407023255015893e-06, "loss": 0.1455, "step": 7466 }, { "epoch": 0.5960724834357787, "grad_norm": 0.3132013562152301, "learning_rate": 7.404526310866647e-06, "loss": 0.1439, "step": 7467 }, { "epoch": 0.5961523110082222, "grad_norm": 0.3095871534834127, "learning_rate": 7.402029540214548e-06, "loss": 0.1532, "step": 7468 }, { "epoch": 0.5962321385806658, "grad_norm": 0.29701188113338195, "learning_rate": 7.399532943226495e-06, "loss": 0.197, "step": 7469 }, { "epoch": 0.5963119661531093, "grad_norm": 0.28421147167513533, "learning_rate": 7.397036520069367e-06, "loss": 0.1359, "step": 7470 }, { "epoch": 0.5963917937255528, "grad_norm": 0.30982034755730764, "learning_rate": 7.394540270910054e-06, "loss": 0.1836, "step": 7471 }, { "epoch": 0.5964716212979964, "grad_norm": 0.2698784432084684, "learning_rate": 7.392044195915405e-06, "loss": 0.1612, "step": 7472 }, { "epoch": 0.5965514488704399, "grad_norm": 0.27433052310274175, "learning_rate": 7.389548295252284e-06, "loss": 0.1411, "step": 7473 }, { "epoch": 0.5966312764428834, "grad_norm": 0.26679536030008366, "learning_rate": 7.387052569087529e-06, "loss": 0.1737, "step": 7474 }, { "epoch": 0.5967111040153269, "grad_norm": 0.2709366687433576, "learning_rate": 7.384557017587971e-06, "loss": 0.1765, "step": 7475 }, { "epoch": 0.5967909315877704, "grad_norm": 0.2989182554843264, "learning_rate": 7.382061640920428e-06, "loss": 0.1812, "step": 7476 }, { "epoch": 0.5968707591602139, "grad_norm": 0.2716723172012614, "learning_rate": 7.379566439251697e-06, "loss": 0.1822, "step": 7477 }, { "epoch": 0.5969505867326574, "grad_norm": 0.29521648219307184, "learning_rate": 7.377071412748587e-06, "loss": 0.1339, "step": 7478 }, { "epoch": 0.597030414305101, "grad_norm": 0.27477296910767823, "learning_rate": 7.374576561577864e-06, "loss": 0.1583, "step": 7479 }, { "epoch": 0.5971102418775445, "grad_norm": 0.32468662919945385, "learning_rate": 7.372081885906317e-06, "loss": 0.1339, "step": 7480 }, { "epoch": 0.597190069449988, "grad_norm": 0.28524756480899544, "learning_rate": 7.369587385900689e-06, "loss": 0.1751, "step": 7481 }, { "epoch": 0.5972698970224315, "grad_norm": 0.30088390362255873, "learning_rate": 7.367093061727741e-06, "loss": 0.1772, "step": 7482 }, { "epoch": 0.5973497245948751, "grad_norm": 0.3316490720720024, "learning_rate": 7.364598913554195e-06, "loss": 0.1711, "step": 7483 }, { "epoch": 0.5974295521673186, "grad_norm": 0.3227480701888123, "learning_rate": 7.3621049415467905e-06, "loss": 0.1702, "step": 7484 }, { "epoch": 0.5975093797397621, "grad_norm": 0.28198335674339536, "learning_rate": 7.359611145872228e-06, "loss": 0.1582, "step": 7485 }, { "epoch": 0.5975892073122057, "grad_norm": 0.27878068278735146, "learning_rate": 7.357117526697209e-06, "loss": 0.2175, "step": 7486 }, { "epoch": 0.5976690348846492, "grad_norm": 0.30364132774393976, "learning_rate": 7.354624084188426e-06, "loss": 0.2318, "step": 7487 }, { "epoch": 0.5977488624570927, "grad_norm": 0.3187303705863757, "learning_rate": 7.352130818512552e-06, "loss": 0.1544, "step": 7488 }, { "epoch": 0.5978286900295362, "grad_norm": 0.30856573284764677, "learning_rate": 7.3496377298362586e-06, "loss": 0.156, "step": 7489 }, { "epoch": 0.5979085176019797, "grad_norm": 0.2794333046761642, "learning_rate": 7.34714481832619e-06, "loss": 0.19, "step": 7490 }, { "epoch": 0.5979883451744232, "grad_norm": 0.29155266663675894, "learning_rate": 7.344652084148994e-06, "loss": 0.2173, "step": 7491 }, { "epoch": 0.5980681727468667, "grad_norm": 0.2787724640193272, "learning_rate": 7.3421595274712985e-06, "loss": 0.1626, "step": 7492 }, { "epoch": 0.5981480003193103, "grad_norm": 0.3065032407640425, "learning_rate": 7.339667148459718e-06, "loss": 0.1365, "step": 7493 }, { "epoch": 0.5982278278917538, "grad_norm": 0.2849971337042533, "learning_rate": 7.337174947280863e-06, "loss": 0.1817, "step": 7494 }, { "epoch": 0.5983076554641973, "grad_norm": 0.3043274904320019, "learning_rate": 7.334682924101322e-06, "loss": 0.1754, "step": 7495 }, { "epoch": 0.5983874830366409, "grad_norm": 0.29763068380232716, "learning_rate": 7.3321910790876825e-06, "loss": 0.1995, "step": 7496 }, { "epoch": 0.5984673106090844, "grad_norm": 0.32761097355145824, "learning_rate": 7.32969941240651e-06, "loss": 0.2195, "step": 7497 }, { "epoch": 0.5985471381815279, "grad_norm": 0.2887658590616813, "learning_rate": 7.327207924224366e-06, "loss": 0.1412, "step": 7498 }, { "epoch": 0.5986269657539715, "grad_norm": 0.2808002625860015, "learning_rate": 7.324716614707794e-06, "loss": 0.1398, "step": 7499 }, { "epoch": 0.598706793326415, "grad_norm": 0.2782629610557159, "learning_rate": 7.322225484023328e-06, "loss": 0.1716, "step": 7500 }, { "epoch": 0.5987866208988585, "grad_norm": 0.27350249207887584, "learning_rate": 7.3197345323374925e-06, "loss": 0.1766, "step": 7501 }, { "epoch": 0.598866448471302, "grad_norm": 0.2935272209040035, "learning_rate": 7.317243759816796e-06, "loss": 0.1993, "step": 7502 }, { "epoch": 0.5989462760437455, "grad_norm": 0.2894658974906702, "learning_rate": 7.314753166627738e-06, "loss": 0.1371, "step": 7503 }, { "epoch": 0.599026103616189, "grad_norm": 0.343521705590901, "learning_rate": 7.312262752936803e-06, "loss": 0.1693, "step": 7504 }, { "epoch": 0.5991059311886325, "grad_norm": 0.2381840227347724, "learning_rate": 7.309772518910468e-06, "loss": 0.1842, "step": 7505 }, { "epoch": 0.599185758761076, "grad_norm": 0.2547536457596895, "learning_rate": 7.307282464715193e-06, "loss": 0.2195, "step": 7506 }, { "epoch": 0.5992655863335196, "grad_norm": 0.2946092563113283, "learning_rate": 7.304792590517429e-06, "loss": 0.171, "step": 7507 }, { "epoch": 0.5993454139059631, "grad_norm": 0.2702570723840269, "learning_rate": 7.302302896483615e-06, "loss": 0.1613, "step": 7508 }, { "epoch": 0.5994252414784066, "grad_norm": 0.294809534722041, "learning_rate": 7.2998133827801745e-06, "loss": 0.1339, "step": 7509 }, { "epoch": 0.5995050690508502, "grad_norm": 0.28162196018927593, "learning_rate": 7.297324049573525e-06, "loss": 0.1523, "step": 7510 }, { "epoch": 0.5995848966232937, "grad_norm": 0.28577260240561686, "learning_rate": 7.294834897030065e-06, "loss": 0.2069, "step": 7511 }, { "epoch": 0.5996647241957372, "grad_norm": 0.2987819659157967, "learning_rate": 7.292345925316191e-06, "loss": 0.2154, "step": 7512 }, { "epoch": 0.5997445517681808, "grad_norm": 0.25242850208952, "learning_rate": 7.289857134598273e-06, "loss": 0.1744, "step": 7513 }, { "epoch": 0.5998243793406243, "grad_norm": 0.28287483487247744, "learning_rate": 7.287368525042682e-06, "loss": 0.1985, "step": 7514 }, { "epoch": 0.5999042069130678, "grad_norm": 0.3029162304558512, "learning_rate": 7.284880096815772e-06, "loss": 0.2081, "step": 7515 }, { "epoch": 0.5999840344855113, "grad_norm": 0.2613291312093242, "learning_rate": 7.282391850083877e-06, "loss": 0.1571, "step": 7516 }, { "epoch": 0.6000638620579548, "grad_norm": 0.3208084044335353, "learning_rate": 7.279903785013338e-06, "loss": 0.1642, "step": 7517 }, { "epoch": 0.6001436896303983, "grad_norm": 0.29748850531498144, "learning_rate": 7.277415901770461e-06, "loss": 0.1947, "step": 7518 }, { "epoch": 0.6002235172028418, "grad_norm": 0.29293657139855267, "learning_rate": 7.274928200521561e-06, "loss": 0.1571, "step": 7519 }, { "epoch": 0.6003033447752854, "grad_norm": 0.27926301348622784, "learning_rate": 7.272440681432923e-06, "loss": 0.1624, "step": 7520 }, { "epoch": 0.6003831723477289, "grad_norm": 0.28235505872030786, "learning_rate": 7.269953344670835e-06, "loss": 0.1628, "step": 7521 }, { "epoch": 0.6004629999201724, "grad_norm": 0.2775868146329433, "learning_rate": 7.267466190401562e-06, "loss": 0.1583, "step": 7522 }, { "epoch": 0.600542827492616, "grad_norm": 0.2801771258025794, "learning_rate": 7.264979218791355e-06, "loss": 0.2398, "step": 7523 }, { "epoch": 0.6006226550650595, "grad_norm": 0.29239439156639824, "learning_rate": 7.2624924300064715e-06, "loss": 0.1659, "step": 7524 }, { "epoch": 0.600702482637503, "grad_norm": 0.2672367214937839, "learning_rate": 7.260005824213128e-06, "loss": 0.1463, "step": 7525 }, { "epoch": 0.6007823102099465, "grad_norm": 0.24414952927025266, "learning_rate": 7.257519401577558e-06, "loss": 0.1236, "step": 7526 }, { "epoch": 0.6008621377823901, "grad_norm": 0.3097081072572302, "learning_rate": 7.2550331622659554e-06, "loss": 0.199, "step": 7527 }, { "epoch": 0.6009419653548336, "grad_norm": 0.37275413669173313, "learning_rate": 7.2525471064445305e-06, "loss": 0.1578, "step": 7528 }, { "epoch": 0.6010217929272771, "grad_norm": 0.32344304911792116, "learning_rate": 7.250061234279457e-06, "loss": 0.1755, "step": 7529 }, { "epoch": 0.6011016204997206, "grad_norm": 0.2571668823091528, "learning_rate": 7.247575545936904e-06, "loss": 0.1363, "step": 7530 }, { "epoch": 0.6011814480721641, "grad_norm": 0.27892232997430805, "learning_rate": 7.245090041583036e-06, "loss": 0.135, "step": 7531 }, { "epoch": 0.6012612756446076, "grad_norm": 0.39542455362253015, "learning_rate": 7.242604721383994e-06, "loss": 0.1403, "step": 7532 }, { "epoch": 0.6013411032170511, "grad_norm": 0.2824548667030938, "learning_rate": 7.240119585505918e-06, "loss": 0.117, "step": 7533 }, { "epoch": 0.6014209307894947, "grad_norm": 0.26611381617797736, "learning_rate": 7.237634634114922e-06, "loss": 0.143, "step": 7534 }, { "epoch": 0.6015007583619382, "grad_norm": 0.2588590181076389, "learning_rate": 7.23514986737712e-06, "loss": 0.1582, "step": 7535 }, { "epoch": 0.6015805859343817, "grad_norm": 0.3742194427081187, "learning_rate": 7.23266528545861e-06, "loss": 0.1389, "step": 7536 }, { "epoch": 0.6016604135068253, "grad_norm": 0.3217045226083837, "learning_rate": 7.230180888525471e-06, "loss": 0.1996, "step": 7537 }, { "epoch": 0.6017402410792688, "grad_norm": 0.32949012309922593, "learning_rate": 7.227696676743781e-06, "loss": 0.2244, "step": 7538 }, { "epoch": 0.6018200686517123, "grad_norm": 0.3297896175605759, "learning_rate": 7.2252126502795935e-06, "loss": 0.1927, "step": 7539 }, { "epoch": 0.6018998962241559, "grad_norm": 0.3012666083499725, "learning_rate": 7.222728809298962e-06, "loss": 0.2073, "step": 7540 }, { "epoch": 0.6019797237965994, "grad_norm": 0.2894613409382, "learning_rate": 7.220245153967916e-06, "loss": 0.1188, "step": 7541 }, { "epoch": 0.6020595513690429, "grad_norm": 0.3624297938614785, "learning_rate": 7.217761684452484e-06, "loss": 0.1684, "step": 7542 }, { "epoch": 0.6021393789414864, "grad_norm": 0.31190952909214437, "learning_rate": 7.215278400918673e-06, "loss": 0.1679, "step": 7543 }, { "epoch": 0.6022192065139299, "grad_norm": 0.3589430338533595, "learning_rate": 7.212795303532478e-06, "loss": 0.1539, "step": 7544 }, { "epoch": 0.6022990340863734, "grad_norm": 0.2880790397931453, "learning_rate": 7.210312392459888e-06, "loss": 0.1486, "step": 7545 }, { "epoch": 0.6023788616588169, "grad_norm": 0.31155917230188546, "learning_rate": 7.207829667866875e-06, "loss": 0.1519, "step": 7546 }, { "epoch": 0.6024586892312604, "grad_norm": 0.2586392563361673, "learning_rate": 7.205347129919398e-06, "loss": 0.1749, "step": 7547 }, { "epoch": 0.602538516803704, "grad_norm": 0.30905488029107997, "learning_rate": 7.202864778783405e-06, "loss": 0.1986, "step": 7548 }, { "epoch": 0.6026183443761475, "grad_norm": 0.3356100141276884, "learning_rate": 7.2003826146248346e-06, "loss": 0.1565, "step": 7549 }, { "epoch": 0.602698171948591, "grad_norm": 0.3124580292563062, "learning_rate": 7.197900637609605e-06, "loss": 0.1723, "step": 7550 }, { "epoch": 0.6027779995210346, "grad_norm": 0.3825800346348944, "learning_rate": 7.19541884790363e-06, "loss": 0.1608, "step": 7551 }, { "epoch": 0.6028578270934781, "grad_norm": 0.2799989911328842, "learning_rate": 7.192937245672807e-06, "loss": 0.1542, "step": 7552 }, { "epoch": 0.6029376546659216, "grad_norm": 0.26582657678646915, "learning_rate": 7.190455831083019e-06, "loss": 0.181, "step": 7553 }, { "epoch": 0.6030174822383652, "grad_norm": 0.28962861688667063, "learning_rate": 7.1879746043001404e-06, "loss": 0.1832, "step": 7554 }, { "epoch": 0.6030973098108087, "grad_norm": 0.2612340054002167, "learning_rate": 7.185493565490029e-06, "loss": 0.2003, "step": 7555 }, { "epoch": 0.6031771373832522, "grad_norm": 0.31569260616773426, "learning_rate": 7.183012714818538e-06, "loss": 0.1745, "step": 7556 }, { "epoch": 0.6032569649556957, "grad_norm": 0.24288449155350184, "learning_rate": 7.1805320524514964e-06, "loss": 0.1889, "step": 7557 }, { "epoch": 0.6033367925281392, "grad_norm": 0.24907598369531508, "learning_rate": 7.178051578554732e-06, "loss": 0.1822, "step": 7558 }, { "epoch": 0.6034166201005827, "grad_norm": 0.26897307714222723, "learning_rate": 7.175571293294052e-06, "loss": 0.1665, "step": 7559 }, { "epoch": 0.6034964476730262, "grad_norm": 0.2816456738672543, "learning_rate": 7.173091196835249e-06, "loss": 0.1589, "step": 7560 }, { "epoch": 0.6035762752454698, "grad_norm": 0.24439891496290495, "learning_rate": 7.170611289344118e-06, "loss": 0.1892, "step": 7561 }, { "epoch": 0.6036561028179133, "grad_norm": 0.3464018202195111, "learning_rate": 7.16813157098642e-06, "loss": 0.1488, "step": 7562 }, { "epoch": 0.6037359303903568, "grad_norm": 0.3013283732064381, "learning_rate": 7.1656520419279244e-06, "loss": 0.1952, "step": 7563 }, { "epoch": 0.6038157579628004, "grad_norm": 0.3574307879674708, "learning_rate": 7.163172702334368e-06, "loss": 0.1493, "step": 7564 }, { "epoch": 0.6038955855352439, "grad_norm": 0.39780064823533234, "learning_rate": 7.160693552371494e-06, "loss": 0.1401, "step": 7565 }, { "epoch": 0.6039754131076874, "grad_norm": 0.30537736180382075, "learning_rate": 7.158214592205021e-06, "loss": 0.1288, "step": 7566 }, { "epoch": 0.604055240680131, "grad_norm": 0.2729196233501926, "learning_rate": 7.155735822000649e-06, "loss": 0.2078, "step": 7567 }, { "epoch": 0.6041350682525745, "grad_norm": 0.2481513813996084, "learning_rate": 7.15325724192409e-06, "loss": 0.1558, "step": 7568 }, { "epoch": 0.604214895825018, "grad_norm": 0.25736599258845155, "learning_rate": 7.150778852141012e-06, "loss": 0.1577, "step": 7569 }, { "epoch": 0.6042947233974615, "grad_norm": 0.29777558616662997, "learning_rate": 7.148300652817097e-06, "loss": 0.1583, "step": 7570 }, { "epoch": 0.604374550969905, "grad_norm": 0.29892474639937877, "learning_rate": 7.145822644117992e-06, "loss": 0.2067, "step": 7571 }, { "epoch": 0.6044543785423485, "grad_norm": 0.337697873265167, "learning_rate": 7.143344826209355e-06, "loss": 0.1288, "step": 7572 }, { "epoch": 0.604534206114792, "grad_norm": 0.25497423740882824, "learning_rate": 7.140867199256809e-06, "loss": 0.1477, "step": 7573 }, { "epoch": 0.6046140336872355, "grad_norm": 0.31809307900436073, "learning_rate": 7.138389763425975e-06, "loss": 0.1523, "step": 7574 }, { "epoch": 0.604693861259679, "grad_norm": 0.2714158820013181, "learning_rate": 7.1359125188824606e-06, "loss": 0.1401, "step": 7575 }, { "epoch": 0.6047736888321226, "grad_norm": 0.31916327857522864, "learning_rate": 7.1334354657918585e-06, "loss": 0.2176, "step": 7576 }, { "epoch": 0.6048535164045661, "grad_norm": 0.3048850596698461, "learning_rate": 7.130958604319754e-06, "loss": 0.133, "step": 7577 }, { "epoch": 0.6049333439770097, "grad_norm": 0.2537929233766569, "learning_rate": 7.12848193463171e-06, "loss": 0.1736, "step": 7578 }, { "epoch": 0.6050131715494532, "grad_norm": 0.3488362214122531, "learning_rate": 7.126005456893288e-06, "loss": 0.1617, "step": 7579 }, { "epoch": 0.6050929991218967, "grad_norm": 0.3069692510144947, "learning_rate": 7.123529171270025e-06, "loss": 0.1733, "step": 7580 }, { "epoch": 0.6051728266943402, "grad_norm": 0.30643250134213335, "learning_rate": 7.121053077927456e-06, "loss": 0.1556, "step": 7581 }, { "epoch": 0.6052526542667838, "grad_norm": 0.2577143316835395, "learning_rate": 7.118577177031094e-06, "loss": 0.132, "step": 7582 }, { "epoch": 0.6053324818392273, "grad_norm": 0.25018152764959906, "learning_rate": 7.1161014687464446e-06, "loss": 0.1549, "step": 7583 }, { "epoch": 0.6054123094116708, "grad_norm": 0.3089982981535362, "learning_rate": 7.113625953239001e-06, "loss": 0.1179, "step": 7584 }, { "epoch": 0.6054921369841143, "grad_norm": 0.3081556723589348, "learning_rate": 7.111150630674237e-06, "loss": 0.1589, "step": 7585 }, { "epoch": 0.6055719645565578, "grad_norm": 0.2985990409784195, "learning_rate": 7.108675501217623e-06, "loss": 0.1438, "step": 7586 }, { "epoch": 0.6056517921290013, "grad_norm": 0.3457715890537656, "learning_rate": 7.1062005650346065e-06, "loss": 0.1703, "step": 7587 }, { "epoch": 0.6057316197014448, "grad_norm": 0.28428558659959857, "learning_rate": 7.103725822290635e-06, "loss": 0.1772, "step": 7588 }, { "epoch": 0.6058114472738884, "grad_norm": 0.2694997744092249, "learning_rate": 7.101251273151128e-06, "loss": 0.1676, "step": 7589 }, { "epoch": 0.6058912748463319, "grad_norm": 0.32681242235112057, "learning_rate": 7.0987769177815e-06, "loss": 0.1625, "step": 7590 }, { "epoch": 0.6059711024187755, "grad_norm": 0.30035533685234495, "learning_rate": 7.096302756347158e-06, "loss": 0.1965, "step": 7591 }, { "epoch": 0.606050929991219, "grad_norm": 0.2817977796157294, "learning_rate": 7.093828789013481e-06, "loss": 0.2147, "step": 7592 }, { "epoch": 0.6061307575636625, "grad_norm": 0.32977891636862106, "learning_rate": 7.0913550159458524e-06, "loss": 0.1911, "step": 7593 }, { "epoch": 0.606210585136106, "grad_norm": 0.274897717193195, "learning_rate": 7.088881437309627e-06, "loss": 0.159, "step": 7594 }, { "epoch": 0.6062904127085496, "grad_norm": 0.3291365109017703, "learning_rate": 7.086408053270159e-06, "loss": 0.2611, "step": 7595 }, { "epoch": 0.6063702402809931, "grad_norm": 0.29897478026670943, "learning_rate": 7.083934863992783e-06, "loss": 0.1287, "step": 7596 }, { "epoch": 0.6064500678534366, "grad_norm": 0.29720903575242386, "learning_rate": 7.081461869642819e-06, "loss": 0.1932, "step": 7597 }, { "epoch": 0.6065298954258801, "grad_norm": 0.26138437837944273, "learning_rate": 7.078989070385581e-06, "loss": 0.1524, "step": 7598 }, { "epoch": 0.6066097229983236, "grad_norm": 0.3176045344500351, "learning_rate": 7.0765164663863615e-06, "loss": 0.1425, "step": 7599 }, { "epoch": 0.6066895505707671, "grad_norm": 0.3173952763792955, "learning_rate": 7.07404405781045e-06, "loss": 0.1957, "step": 7600 }, { "epoch": 0.6067693781432106, "grad_norm": 0.2878808143262356, "learning_rate": 7.071571844823111e-06, "loss": 0.1849, "step": 7601 }, { "epoch": 0.6068492057156541, "grad_norm": 0.26325536764450413, "learning_rate": 7.069099827589607e-06, "loss": 0.141, "step": 7602 }, { "epoch": 0.6069290332880977, "grad_norm": 0.2914479455665014, "learning_rate": 7.0666280062751845e-06, "loss": 0.1547, "step": 7603 }, { "epoch": 0.6070088608605412, "grad_norm": 0.30186793205285445, "learning_rate": 7.064156381045063e-06, "loss": 0.1399, "step": 7604 }, { "epoch": 0.6070886884329848, "grad_norm": 0.28216351255119526, "learning_rate": 7.061684952064476e-06, "loss": 0.1661, "step": 7605 }, { "epoch": 0.6071685160054283, "grad_norm": 0.27226680959761745, "learning_rate": 7.059213719498616e-06, "loss": 0.142, "step": 7606 }, { "epoch": 0.6072483435778718, "grad_norm": 0.28629095718557257, "learning_rate": 7.056742683512686e-06, "loss": 0.1503, "step": 7607 }, { "epoch": 0.6073281711503153, "grad_norm": 0.26481850520293093, "learning_rate": 7.054271844271854e-06, "loss": 0.1532, "step": 7608 }, { "epoch": 0.6074079987227589, "grad_norm": 0.2837991017512583, "learning_rate": 7.0518012019413e-06, "loss": 0.1202, "step": 7609 }, { "epoch": 0.6074878262952024, "grad_norm": 0.31550431124618594, "learning_rate": 7.049330756686163e-06, "loss": 0.1952, "step": 7610 }, { "epoch": 0.6075676538676459, "grad_norm": 0.3155507991738878, "learning_rate": 7.046860508671586e-06, "loss": 0.1662, "step": 7611 }, { "epoch": 0.6076474814400894, "grad_norm": 0.26760131566876044, "learning_rate": 7.044390458062702e-06, "loss": 0.1348, "step": 7612 }, { "epoch": 0.6077273090125329, "grad_norm": 0.375172746980959, "learning_rate": 7.041920605024614e-06, "loss": 0.1676, "step": 7613 }, { "epoch": 0.6078071365849764, "grad_norm": 0.338985146387353, "learning_rate": 7.0394509497224335e-06, "loss": 0.1664, "step": 7614 }, { "epoch": 0.6078869641574199, "grad_norm": 0.3290107908475239, "learning_rate": 7.0369814923212355e-06, "loss": 0.1499, "step": 7615 }, { "epoch": 0.6079667917298635, "grad_norm": 0.2555884429156734, "learning_rate": 7.0345122329861035e-06, "loss": 0.1535, "step": 7616 }, { "epoch": 0.608046619302307, "grad_norm": 0.3358182020666776, "learning_rate": 7.032043171882087e-06, "loss": 0.1571, "step": 7617 }, { "epoch": 0.6081264468747506, "grad_norm": 0.30170486369555927, "learning_rate": 7.029574309174245e-06, "loss": 0.1361, "step": 7618 }, { "epoch": 0.6082062744471941, "grad_norm": 0.2744153304318644, "learning_rate": 7.027105645027603e-06, "loss": 0.1417, "step": 7619 }, { "epoch": 0.6082861020196376, "grad_norm": 0.29686718842524407, "learning_rate": 7.024637179607184e-06, "loss": 0.1754, "step": 7620 }, { "epoch": 0.6083659295920811, "grad_norm": 0.3107238850816021, "learning_rate": 7.022168913077994e-06, "loss": 0.171, "step": 7621 }, { "epoch": 0.6084457571645246, "grad_norm": 0.27431553401197023, "learning_rate": 7.019700845605027e-06, "loss": 0.1581, "step": 7622 }, { "epoch": 0.6085255847369682, "grad_norm": 0.2703403185992444, "learning_rate": 7.017232977353265e-06, "loss": 0.2144, "step": 7623 }, { "epoch": 0.6086054123094117, "grad_norm": 0.3286361249964389, "learning_rate": 7.014765308487673e-06, "loss": 0.1812, "step": 7624 }, { "epoch": 0.6086852398818552, "grad_norm": 0.2931753430264021, "learning_rate": 7.012297839173209e-06, "loss": 0.1461, "step": 7625 }, { "epoch": 0.6087650674542987, "grad_norm": 0.30679148677810164, "learning_rate": 7.0098305695748106e-06, "loss": 0.1989, "step": 7626 }, { "epoch": 0.6088448950267422, "grad_norm": 0.289830057296651, "learning_rate": 7.007363499857402e-06, "loss": 0.1446, "step": 7627 }, { "epoch": 0.6089247225991857, "grad_norm": 0.29931533395781795, "learning_rate": 7.0048966301859046e-06, "loss": 0.1618, "step": 7628 }, { "epoch": 0.6090045501716292, "grad_norm": 0.27025870674531, "learning_rate": 7.00242996072521e-06, "loss": 0.1816, "step": 7629 }, { "epoch": 0.6090843777440728, "grad_norm": 0.3197283164487347, "learning_rate": 6.9999634916402135e-06, "loss": 0.1274, "step": 7630 }, { "epoch": 0.6091642053165163, "grad_norm": 0.2713655238773183, "learning_rate": 6.997497223095783e-06, "loss": 0.2125, "step": 7631 }, { "epoch": 0.6092440328889599, "grad_norm": 0.3319823091883686, "learning_rate": 6.995031155256783e-06, "loss": 0.1545, "step": 7632 }, { "epoch": 0.6093238604614034, "grad_norm": 0.31481249420590846, "learning_rate": 6.992565288288058e-06, "loss": 0.1904, "step": 7633 }, { "epoch": 0.6094036880338469, "grad_norm": 0.29790299986476143, "learning_rate": 6.99009962235444e-06, "loss": 0.1378, "step": 7634 }, { "epoch": 0.6094835156062904, "grad_norm": 0.29107692914978583, "learning_rate": 6.987634157620753e-06, "loss": 0.1603, "step": 7635 }, { "epoch": 0.609563343178734, "grad_norm": 0.3020819565618063, "learning_rate": 6.985168894251798e-06, "loss": 0.1503, "step": 7636 }, { "epoch": 0.6096431707511775, "grad_norm": 0.26792717227977497, "learning_rate": 6.982703832412374e-06, "loss": 0.1845, "step": 7637 }, { "epoch": 0.609722998323621, "grad_norm": 0.2794273431806938, "learning_rate": 6.980238972267257e-06, "loss": 0.1566, "step": 7638 }, { "epoch": 0.6098028258960645, "grad_norm": 0.30258925151201366, "learning_rate": 6.977774313981216e-06, "loss": 0.1248, "step": 7639 }, { "epoch": 0.609882653468508, "grad_norm": 0.298656866724654, "learning_rate": 6.975309857719002e-06, "loss": 0.1965, "step": 7640 }, { "epoch": 0.6099624810409515, "grad_norm": 0.25379097738579787, "learning_rate": 6.972845603645352e-06, "loss": 0.1898, "step": 7641 }, { "epoch": 0.610042308613395, "grad_norm": 0.242590059707312, "learning_rate": 6.9703815519249954e-06, "loss": 0.1266, "step": 7642 }, { "epoch": 0.6101221361858385, "grad_norm": 0.2540856365192093, "learning_rate": 6.96791770272264e-06, "loss": 0.1826, "step": 7643 }, { "epoch": 0.6102019637582821, "grad_norm": 0.34421541280353857, "learning_rate": 6.96545405620299e-06, "loss": 0.1631, "step": 7644 }, { "epoch": 0.6102817913307257, "grad_norm": 0.2803604920690738, "learning_rate": 6.962990612530725e-06, "loss": 0.1701, "step": 7645 }, { "epoch": 0.6103616189031692, "grad_norm": 0.2937785561150121, "learning_rate": 6.96052737187052e-06, "loss": 0.1543, "step": 7646 }, { "epoch": 0.6104414464756127, "grad_norm": 0.30400090180000944, "learning_rate": 6.958064334387031e-06, "loss": 0.1552, "step": 7647 }, { "epoch": 0.6105212740480562, "grad_norm": 0.28899593716375266, "learning_rate": 6.955601500244904e-06, "loss": 0.1748, "step": 7648 }, { "epoch": 0.6106011016204997, "grad_norm": 0.27801094096346385, "learning_rate": 6.953138869608771e-06, "loss": 0.1135, "step": 7649 }, { "epoch": 0.6106809291929433, "grad_norm": 0.28829045476350146, "learning_rate": 6.950676442643242e-06, "loss": 0.1365, "step": 7650 }, { "epoch": 0.6107607567653868, "grad_norm": 0.29610392660878254, "learning_rate": 6.94821421951293e-06, "loss": 0.1568, "step": 7651 }, { "epoch": 0.6108405843378303, "grad_norm": 0.2629541500252418, "learning_rate": 6.945752200382414e-06, "loss": 0.1556, "step": 7652 }, { "epoch": 0.6109204119102738, "grad_norm": 0.2679462273550872, "learning_rate": 6.943290385416284e-06, "loss": 0.2017, "step": 7653 }, { "epoch": 0.6110002394827173, "grad_norm": 0.28762805974506755, "learning_rate": 6.940828774779087e-06, "loss": 0.1073, "step": 7654 }, { "epoch": 0.6110800670551608, "grad_norm": 0.26839924960181544, "learning_rate": 6.938367368635388e-06, "loss": 0.1697, "step": 7655 }, { "epoch": 0.6111598946276043, "grad_norm": 0.2759473667913425, "learning_rate": 6.9359061671497105e-06, "loss": 0.2354, "step": 7656 }, { "epoch": 0.6112397222000479, "grad_norm": 0.26752944317975946, "learning_rate": 6.933445170486577e-06, "loss": 0.1983, "step": 7657 }, { "epoch": 0.6113195497724914, "grad_norm": 0.27310419102633976, "learning_rate": 6.930984378810504e-06, "loss": 0.1579, "step": 7658 }, { "epoch": 0.611399377344935, "grad_norm": 0.2789821962143291, "learning_rate": 6.928523792285974e-06, "loss": 0.1675, "step": 7659 }, { "epoch": 0.6114792049173785, "grad_norm": 0.3270638712352993, "learning_rate": 6.926063411077479e-06, "loss": 0.1511, "step": 7660 }, { "epoch": 0.611559032489822, "grad_norm": 0.28335704159082475, "learning_rate": 6.923603235349474e-06, "loss": 0.1711, "step": 7661 }, { "epoch": 0.6116388600622655, "grad_norm": 0.2903521547243148, "learning_rate": 6.921143265266424e-06, "loss": 0.157, "step": 7662 }, { "epoch": 0.611718687634709, "grad_norm": 0.3500810731493733, "learning_rate": 6.918683500992761e-06, "loss": 0.176, "step": 7663 }, { "epoch": 0.6117985152071526, "grad_norm": 0.28720831816880776, "learning_rate": 6.91622394269291e-06, "loss": 0.1995, "step": 7664 }, { "epoch": 0.6118783427795961, "grad_norm": 0.24314684140944076, "learning_rate": 6.9137645905312865e-06, "loss": 0.2164, "step": 7665 }, { "epoch": 0.6119581703520396, "grad_norm": 0.35906819445780014, "learning_rate": 6.911305444672284e-06, "loss": 0.1738, "step": 7666 }, { "epoch": 0.6120379979244831, "grad_norm": 0.26873319926999595, "learning_rate": 6.908846505280291e-06, "loss": 0.1879, "step": 7667 }, { "epoch": 0.6121178254969266, "grad_norm": 0.3334673137355594, "learning_rate": 6.906387772519675e-06, "loss": 0.1497, "step": 7668 }, { "epoch": 0.6121976530693701, "grad_norm": 0.319516297564685, "learning_rate": 6.903929246554794e-06, "loss": 0.1614, "step": 7669 }, { "epoch": 0.6122774806418136, "grad_norm": 0.29223894204313744, "learning_rate": 6.901470927549994e-06, "loss": 0.232, "step": 7670 }, { "epoch": 0.6123573082142572, "grad_norm": 0.29206122311709853, "learning_rate": 6.899012815669595e-06, "loss": 0.1498, "step": 7671 }, { "epoch": 0.6124371357867008, "grad_norm": 0.27243455468541916, "learning_rate": 6.896554911077921e-06, "loss": 0.1328, "step": 7672 }, { "epoch": 0.6125169633591443, "grad_norm": 0.3294697486663042, "learning_rate": 6.894097213939266e-06, "loss": 0.1957, "step": 7673 }, { "epoch": 0.6125967909315878, "grad_norm": 0.3035075361114621, "learning_rate": 6.891639724417924e-06, "loss": 0.166, "step": 7674 }, { "epoch": 0.6126766185040313, "grad_norm": 0.2791967517471857, "learning_rate": 6.889182442678164e-06, "loss": 0.197, "step": 7675 }, { "epoch": 0.6127564460764748, "grad_norm": 0.29978170602624454, "learning_rate": 6.886725368884248e-06, "loss": 0.1612, "step": 7676 }, { "epoch": 0.6128362736489184, "grad_norm": 0.30618188583282013, "learning_rate": 6.884268503200417e-06, "loss": 0.1722, "step": 7677 }, { "epoch": 0.6129161012213619, "grad_norm": 0.32262261441432394, "learning_rate": 6.8818118457909115e-06, "loss": 0.1612, "step": 7678 }, { "epoch": 0.6129959287938054, "grad_norm": 0.2456801740979109, "learning_rate": 6.8793553968199424e-06, "loss": 0.21, "step": 7679 }, { "epoch": 0.6130757563662489, "grad_norm": 0.37458781656516776, "learning_rate": 6.876899156451713e-06, "loss": 0.125, "step": 7680 }, { "epoch": 0.6131555839386924, "grad_norm": 0.3291392846741735, "learning_rate": 6.8744431248504185e-06, "loss": 0.164, "step": 7681 }, { "epoch": 0.6132354115111359, "grad_norm": 0.3143153501222371, "learning_rate": 6.8719873021802295e-06, "loss": 0.1631, "step": 7682 }, { "epoch": 0.6133152390835794, "grad_norm": 0.2524999873849358, "learning_rate": 6.869531688605312e-06, "loss": 0.1287, "step": 7683 }, { "epoch": 0.613395066656023, "grad_norm": 0.24878963599153084, "learning_rate": 6.867076284289811e-06, "loss": 0.1891, "step": 7684 }, { "epoch": 0.6134748942284665, "grad_norm": 0.2761120011820755, "learning_rate": 6.864621089397865e-06, "loss": 0.1995, "step": 7685 }, { "epoch": 0.6135547218009101, "grad_norm": 0.2979591770917232, "learning_rate": 6.8621661040935895e-06, "loss": 0.166, "step": 7686 }, { "epoch": 0.6136345493733536, "grad_norm": 0.25477205869770325, "learning_rate": 6.85971132854109e-06, "loss": 0.1261, "step": 7687 }, { "epoch": 0.6137143769457971, "grad_norm": 0.32261934844244866, "learning_rate": 6.857256762904465e-06, "loss": 0.2249, "step": 7688 }, { "epoch": 0.6137942045182406, "grad_norm": 0.2740386020465224, "learning_rate": 6.854802407347785e-06, "loss": 0.2176, "step": 7689 }, { "epoch": 0.6138740320906841, "grad_norm": 0.3170115258345458, "learning_rate": 6.8523482620351175e-06, "loss": 0.161, "step": 7690 }, { "epoch": 0.6139538596631277, "grad_norm": 0.2779884600259277, "learning_rate": 6.849894327130513e-06, "loss": 0.1899, "step": 7691 }, { "epoch": 0.6140336872355712, "grad_norm": 0.2557985006919795, "learning_rate": 6.847440602798007e-06, "loss": 0.1859, "step": 7692 }, { "epoch": 0.6141135148080147, "grad_norm": 0.32452565442364656, "learning_rate": 6.844987089201622e-06, "loss": 0.1453, "step": 7693 }, { "epoch": 0.6141933423804582, "grad_norm": 0.2900904616185844, "learning_rate": 6.84253378650536e-06, "loss": 0.1766, "step": 7694 }, { "epoch": 0.6142731699529017, "grad_norm": 0.28177789285236854, "learning_rate": 6.840080694873224e-06, "loss": 0.2052, "step": 7695 }, { "epoch": 0.6143529975253452, "grad_norm": 0.2496337422792737, "learning_rate": 6.837627814469182e-06, "loss": 0.1885, "step": 7696 }, { "epoch": 0.6144328250977887, "grad_norm": 0.2965948449081479, "learning_rate": 6.835175145457213e-06, "loss": 0.1885, "step": 7697 }, { "epoch": 0.6145126526702323, "grad_norm": 0.31002952629416863, "learning_rate": 6.832722688001254e-06, "loss": 0.1713, "step": 7698 }, { "epoch": 0.6145924802426758, "grad_norm": 0.4144894433852231, "learning_rate": 6.830270442265256e-06, "loss": 0.1658, "step": 7699 }, { "epoch": 0.6146723078151194, "grad_norm": 0.29046835909922997, "learning_rate": 6.827818408413132e-06, "loss": 0.169, "step": 7700 }, { "epoch": 0.6147521353875629, "grad_norm": 0.27646707105490204, "learning_rate": 6.825366586608792e-06, "loss": 0.1928, "step": 7701 }, { "epoch": 0.6148319629600064, "grad_norm": 0.29209961524289874, "learning_rate": 6.822914977016134e-06, "loss": 0.1431, "step": 7702 }, { "epoch": 0.6149117905324499, "grad_norm": 0.3052870299934648, "learning_rate": 6.820463579799033e-06, "loss": 0.1364, "step": 7703 }, { "epoch": 0.6149916181048934, "grad_norm": 0.30092162036426284, "learning_rate": 6.818012395121365e-06, "loss": 0.1494, "step": 7704 }, { "epoch": 0.615071445677337, "grad_norm": 0.26637901289240684, "learning_rate": 6.81556142314697e-06, "loss": 0.1579, "step": 7705 }, { "epoch": 0.6151512732497805, "grad_norm": 0.2515586667742467, "learning_rate": 6.813110664039697e-06, "loss": 0.1637, "step": 7706 }, { "epoch": 0.615231100822224, "grad_norm": 0.25653165179863285, "learning_rate": 6.8106601179633635e-06, "loss": 0.1987, "step": 7707 }, { "epoch": 0.6153109283946675, "grad_norm": 0.3076731654885414, "learning_rate": 6.8082097850817765e-06, "loss": 0.1941, "step": 7708 }, { "epoch": 0.615390755967111, "grad_norm": 0.289643829335464, "learning_rate": 6.8057596655587375e-06, "loss": 0.1513, "step": 7709 }, { "epoch": 0.6154705835395545, "grad_norm": 0.40949429763016326, "learning_rate": 6.803309759558021e-06, "loss": 0.1443, "step": 7710 }, { "epoch": 0.615550411111998, "grad_norm": 0.33902942907695427, "learning_rate": 6.800860067243398e-06, "loss": 0.226, "step": 7711 }, { "epoch": 0.6156302386844416, "grad_norm": 0.26433145683107123, "learning_rate": 6.798410588778617e-06, "loss": 0.1365, "step": 7712 }, { "epoch": 0.6157100662568852, "grad_norm": 0.26337628034345234, "learning_rate": 6.79596132432742e-06, "loss": 0.1457, "step": 7713 }, { "epoch": 0.6157898938293287, "grad_norm": 0.26724855042166007, "learning_rate": 6.793512274053529e-06, "loss": 0.1506, "step": 7714 }, { "epoch": 0.6158697214017722, "grad_norm": 0.2553976658771319, "learning_rate": 6.791063438120653e-06, "loss": 0.1438, "step": 7715 }, { "epoch": 0.6159495489742157, "grad_norm": 0.25244286873334004, "learning_rate": 6.788614816692487e-06, "loss": 0.1716, "step": 7716 }, { "epoch": 0.6160293765466592, "grad_norm": 0.2959559478191478, "learning_rate": 6.786166409932711e-06, "loss": 0.1876, "step": 7717 }, { "epoch": 0.6161092041191027, "grad_norm": 0.28243797968678885, "learning_rate": 6.783718218004992e-06, "loss": 0.2014, "step": 7718 }, { "epoch": 0.6161890316915463, "grad_norm": 0.3013811483964268, "learning_rate": 6.781270241072982e-06, "loss": 0.1629, "step": 7719 }, { "epoch": 0.6162688592639898, "grad_norm": 0.3113126237119909, "learning_rate": 6.778822479300319e-06, "loss": 0.1429, "step": 7720 }, { "epoch": 0.6163486868364333, "grad_norm": 0.2907882634863935, "learning_rate": 6.7763749328506245e-06, "loss": 0.1726, "step": 7721 }, { "epoch": 0.6164285144088768, "grad_norm": 0.29052308332564597, "learning_rate": 6.77392760188751e-06, "loss": 0.1492, "step": 7722 }, { "epoch": 0.6165083419813203, "grad_norm": 0.2882430560250524, "learning_rate": 6.771480486574569e-06, "loss": 0.184, "step": 7723 }, { "epoch": 0.6165881695537638, "grad_norm": 0.2598080361295156, "learning_rate": 6.769033587075377e-06, "loss": 0.1626, "step": 7724 }, { "epoch": 0.6166679971262073, "grad_norm": 0.31865811235859265, "learning_rate": 6.766586903553508e-06, "loss": 0.1713, "step": 7725 }, { "epoch": 0.6167478246986509, "grad_norm": 0.24003823656730436, "learning_rate": 6.764140436172506e-06, "loss": 0.1485, "step": 7726 }, { "epoch": 0.6168276522710945, "grad_norm": 0.3205937703195922, "learning_rate": 6.761694185095911e-06, "loss": 0.166, "step": 7727 }, { "epoch": 0.616907479843538, "grad_norm": 0.26622033211343665, "learning_rate": 6.759248150487243e-06, "loss": 0.1778, "step": 7728 }, { "epoch": 0.6169873074159815, "grad_norm": 0.27826930709662023, "learning_rate": 6.756802332510013e-06, "loss": 0.1266, "step": 7729 }, { "epoch": 0.617067134988425, "grad_norm": 0.30061060684058516, "learning_rate": 6.7543567313277144e-06, "loss": 0.2647, "step": 7730 }, { "epoch": 0.6171469625608685, "grad_norm": 0.2984311912669078, "learning_rate": 6.7519113471038186e-06, "loss": 0.1228, "step": 7731 }, { "epoch": 0.617226790133312, "grad_norm": 0.36855753871727565, "learning_rate": 6.749466180001798e-06, "loss": 0.1501, "step": 7732 }, { "epoch": 0.6173066177057556, "grad_norm": 0.31678165242720435, "learning_rate": 6.747021230185098e-06, "loss": 0.1553, "step": 7733 }, { "epoch": 0.6173864452781991, "grad_norm": 0.29312713114611316, "learning_rate": 6.744576497817158e-06, "loss": 0.2079, "step": 7734 }, { "epoch": 0.6174662728506426, "grad_norm": 0.2783897800310831, "learning_rate": 6.742131983061393e-06, "loss": 0.2145, "step": 7735 }, { "epoch": 0.6175461004230861, "grad_norm": 0.27808626100302003, "learning_rate": 6.739687686081216e-06, "loss": 0.183, "step": 7736 }, { "epoch": 0.6176259279955296, "grad_norm": 0.34813015478473414, "learning_rate": 6.737243607040014e-06, "loss": 0.252, "step": 7737 }, { "epoch": 0.6177057555679731, "grad_norm": 0.272805876812613, "learning_rate": 6.73479974610116e-06, "loss": 0.1519, "step": 7738 }, { "epoch": 0.6177855831404166, "grad_norm": 0.2712388821924525, "learning_rate": 6.7323561034280284e-06, "loss": 0.129, "step": 7739 }, { "epoch": 0.6178654107128603, "grad_norm": 0.27988396617067135, "learning_rate": 6.729912679183952e-06, "loss": 0.1423, "step": 7740 }, { "epoch": 0.6179452382853038, "grad_norm": 0.2886209948650126, "learning_rate": 6.72746947353228e-06, "loss": 0.1965, "step": 7741 }, { "epoch": 0.6180250658577473, "grad_norm": 0.3049713727224393, "learning_rate": 6.725026486636315e-06, "loss": 0.1316, "step": 7742 }, { "epoch": 0.6181048934301908, "grad_norm": 0.27923563872755264, "learning_rate": 6.722583718659376e-06, "loss": 0.1672, "step": 7743 }, { "epoch": 0.6181847210026343, "grad_norm": 0.25764868284878883, "learning_rate": 6.720141169764738e-06, "loss": 0.1508, "step": 7744 }, { "epoch": 0.6182645485750778, "grad_norm": 0.2738844688956949, "learning_rate": 6.7176988401156896e-06, "loss": 0.1479, "step": 7745 }, { "epoch": 0.6183443761475214, "grad_norm": 0.26979309762635917, "learning_rate": 6.715256729875483e-06, "loss": 0.1881, "step": 7746 }, { "epoch": 0.6184242037199649, "grad_norm": 0.2632862288640002, "learning_rate": 6.712814839207363e-06, "loss": 0.18, "step": 7747 }, { "epoch": 0.6185040312924084, "grad_norm": 0.26514574249396977, "learning_rate": 6.7103731682745634e-06, "loss": 0.1743, "step": 7748 }, { "epoch": 0.6185838588648519, "grad_norm": 0.3334786079632278, "learning_rate": 6.707931717240294e-06, "loss": 0.1885, "step": 7749 }, { "epoch": 0.6186636864372954, "grad_norm": 0.3010989972326368, "learning_rate": 6.705490486267769e-06, "loss": 0.1987, "step": 7750 }, { "epoch": 0.6187435140097389, "grad_norm": 0.32744850053957486, "learning_rate": 6.703049475520161e-06, "loss": 0.1853, "step": 7751 }, { "epoch": 0.6188233415821824, "grad_norm": 0.33145685645112727, "learning_rate": 6.700608685160654e-06, "loss": 0.1896, "step": 7752 }, { "epoch": 0.618903169154626, "grad_norm": 0.2736129700143059, "learning_rate": 6.698168115352397e-06, "loss": 0.177, "step": 7753 }, { "epoch": 0.6189829967270696, "grad_norm": 0.27520786992895446, "learning_rate": 6.695727766258533e-06, "loss": 0.1918, "step": 7754 }, { "epoch": 0.6190628242995131, "grad_norm": 0.3042503762780414, "learning_rate": 6.693287638042194e-06, "loss": 0.1424, "step": 7755 }, { "epoch": 0.6191426518719566, "grad_norm": 0.3018442761273215, "learning_rate": 6.690847730866487e-06, "loss": 0.1284, "step": 7756 }, { "epoch": 0.6192224794444001, "grad_norm": 0.3094274446709479, "learning_rate": 6.688408044894517e-06, "loss": 0.1925, "step": 7757 }, { "epoch": 0.6193023070168436, "grad_norm": 0.2690300515074372, "learning_rate": 6.68596858028936e-06, "loss": 0.1541, "step": 7758 }, { "epoch": 0.6193821345892871, "grad_norm": 0.290611095297971, "learning_rate": 6.683529337214092e-06, "loss": 0.1741, "step": 7759 }, { "epoch": 0.6194619621617307, "grad_norm": 0.2995940994426038, "learning_rate": 6.681090315831762e-06, "loss": 0.1293, "step": 7760 }, { "epoch": 0.6195417897341742, "grad_norm": 0.25719390511743095, "learning_rate": 6.678651516305409e-06, "loss": 0.1301, "step": 7761 }, { "epoch": 0.6196216173066177, "grad_norm": 0.3303317031176744, "learning_rate": 6.676212938798059e-06, "loss": 0.1653, "step": 7762 }, { "epoch": 0.6197014448790612, "grad_norm": 0.27521855651824434, "learning_rate": 6.67377458347272e-06, "loss": 0.1761, "step": 7763 }, { "epoch": 0.6197812724515047, "grad_norm": 0.34165077360011337, "learning_rate": 6.671336450492387e-06, "loss": 0.1685, "step": 7764 }, { "epoch": 0.6198611000239482, "grad_norm": 0.37065167751565226, "learning_rate": 6.668898540020039e-06, "loss": 0.1556, "step": 7765 }, { "epoch": 0.6199409275963917, "grad_norm": 0.3139513474184086, "learning_rate": 6.666460852218642e-06, "loss": 0.1878, "step": 7766 }, { "epoch": 0.6200207551688354, "grad_norm": 0.2891067298815069, "learning_rate": 6.664023387251146e-06, "loss": 0.1361, "step": 7767 }, { "epoch": 0.6201005827412789, "grad_norm": 0.258728035327938, "learning_rate": 6.6615861452804805e-06, "loss": 0.1411, "step": 7768 }, { "epoch": 0.6201804103137224, "grad_norm": 0.3127119755598124, "learning_rate": 6.659149126469574e-06, "loss": 0.1512, "step": 7769 }, { "epoch": 0.6202602378861659, "grad_norm": 0.26969612972364426, "learning_rate": 6.656712330981324e-06, "loss": 0.216, "step": 7770 }, { "epoch": 0.6203400654586094, "grad_norm": 0.3041457322298914, "learning_rate": 6.654275758978626e-06, "loss": 0.1392, "step": 7771 }, { "epoch": 0.6204198930310529, "grad_norm": 0.2715715191689654, "learning_rate": 6.651839410624352e-06, "loss": 0.1227, "step": 7772 }, { "epoch": 0.6204997206034965, "grad_norm": 0.3190223082349311, "learning_rate": 6.649403286081364e-06, "loss": 0.1657, "step": 7773 }, { "epoch": 0.62057954817594, "grad_norm": 0.2992890852950811, "learning_rate": 6.64696738551251e-06, "loss": 0.1456, "step": 7774 }, { "epoch": 0.6206593757483835, "grad_norm": 0.27047256934405134, "learning_rate": 6.644531709080608e-06, "loss": 0.1262, "step": 7775 }, { "epoch": 0.620739203320827, "grad_norm": 0.2971057859180975, "learning_rate": 6.642096256948492e-06, "loss": 0.1984, "step": 7776 }, { "epoch": 0.6208190308932705, "grad_norm": 0.27002731652134987, "learning_rate": 6.639661029278944e-06, "loss": 0.1642, "step": 7777 }, { "epoch": 0.620898858465714, "grad_norm": 0.30478033492461004, "learning_rate": 6.6372260262347636e-06, "loss": 0.194, "step": 7778 }, { "epoch": 0.6209786860381575, "grad_norm": 0.2937295225062732, "learning_rate": 6.6347912479787115e-06, "loss": 0.1718, "step": 7779 }, { "epoch": 0.621058513610601, "grad_norm": 0.2926384640602849, "learning_rate": 6.6323566946735505e-06, "loss": 0.1764, "step": 7780 }, { "epoch": 0.6211383411830447, "grad_norm": 0.30670321219861746, "learning_rate": 6.629922366482014e-06, "loss": 0.1684, "step": 7781 }, { "epoch": 0.6212181687554882, "grad_norm": 0.2783259313548897, "learning_rate": 6.627488263566834e-06, "loss": 0.1536, "step": 7782 }, { "epoch": 0.6212979963279317, "grad_norm": 0.2997812827189841, "learning_rate": 6.625054386090719e-06, "loss": 0.1769, "step": 7783 }, { "epoch": 0.6213778239003752, "grad_norm": 0.30279370886590223, "learning_rate": 6.622620734216355e-06, "loss": 0.1779, "step": 7784 }, { "epoch": 0.6214576514728187, "grad_norm": 0.3224876757314385, "learning_rate": 6.620187308106436e-06, "loss": 0.2047, "step": 7785 }, { "epoch": 0.6215374790452622, "grad_norm": 0.3034426470848871, "learning_rate": 6.617754107923613e-06, "loss": 0.2135, "step": 7786 }, { "epoch": 0.6216173066177058, "grad_norm": 0.3020696467360019, "learning_rate": 6.61532113383055e-06, "loss": 0.1615, "step": 7787 }, { "epoch": 0.6216971341901493, "grad_norm": 0.26903517851699726, "learning_rate": 6.6128883859898685e-06, "loss": 0.1008, "step": 7788 }, { "epoch": 0.6217769617625928, "grad_norm": 0.28228078442454424, "learning_rate": 6.610455864564201e-06, "loss": 0.1688, "step": 7789 }, { "epoch": 0.6218567893350363, "grad_norm": 0.2951649208681467, "learning_rate": 6.608023569716142e-06, "loss": 0.1466, "step": 7790 }, { "epoch": 0.6219366169074798, "grad_norm": 0.3494452036522662, "learning_rate": 6.605591501608282e-06, "loss": 0.1539, "step": 7791 }, { "epoch": 0.6220164444799233, "grad_norm": 0.27153817435411953, "learning_rate": 6.603159660403199e-06, "loss": 0.1829, "step": 7792 }, { "epoch": 0.6220962720523668, "grad_norm": 0.3258059268563136, "learning_rate": 6.600728046263447e-06, "loss": 0.1497, "step": 7793 }, { "epoch": 0.6221760996248105, "grad_norm": 0.2826633436651887, "learning_rate": 6.598296659351577e-06, "loss": 0.151, "step": 7794 }, { "epoch": 0.622255927197254, "grad_norm": 0.29894476723630425, "learning_rate": 6.595865499830108e-06, "loss": 0.1441, "step": 7795 }, { "epoch": 0.6223357547696975, "grad_norm": 0.28370708712312953, "learning_rate": 6.5934345678615655e-06, "loss": 0.1597, "step": 7796 }, { "epoch": 0.622415582342141, "grad_norm": 0.2666084791971368, "learning_rate": 6.591003863608438e-06, "loss": 0.2088, "step": 7797 }, { "epoch": 0.6224954099145845, "grad_norm": 0.2907774234786574, "learning_rate": 6.588573387233209e-06, "loss": 0.1787, "step": 7798 }, { "epoch": 0.622575237487028, "grad_norm": 0.33648883032388666, "learning_rate": 6.586143138898351e-06, "loss": 0.2179, "step": 7799 }, { "epoch": 0.6226550650594715, "grad_norm": 0.31780964662039124, "learning_rate": 6.583713118766313e-06, "loss": 0.1508, "step": 7800 }, { "epoch": 0.6227348926319151, "grad_norm": 0.27170271728300704, "learning_rate": 6.581283326999534e-06, "loss": 0.1461, "step": 7801 }, { "epoch": 0.6228147202043586, "grad_norm": 0.2541334922988484, "learning_rate": 6.578853763760435e-06, "loss": 0.1835, "step": 7802 }, { "epoch": 0.6228945477768021, "grad_norm": 0.32152472714413893, "learning_rate": 6.5764244292114235e-06, "loss": 0.147, "step": 7803 }, { "epoch": 0.6229743753492456, "grad_norm": 0.38121426519662877, "learning_rate": 6.57399532351489e-06, "loss": 0.1699, "step": 7804 }, { "epoch": 0.6230542029216891, "grad_norm": 0.30463909770999603, "learning_rate": 6.571566446833211e-06, "loss": 0.1648, "step": 7805 }, { "epoch": 0.6231340304941326, "grad_norm": 0.3021385901994211, "learning_rate": 6.569137799328748e-06, "loss": 0.1926, "step": 7806 }, { "epoch": 0.6232138580665761, "grad_norm": 0.2987255805142103, "learning_rate": 6.566709381163843e-06, "loss": 0.1698, "step": 7807 }, { "epoch": 0.6232936856390198, "grad_norm": 0.31058108028382564, "learning_rate": 6.564281192500832e-06, "loss": 0.1819, "step": 7808 }, { "epoch": 0.6233735132114633, "grad_norm": 0.3426862635909591, "learning_rate": 6.5618532335020245e-06, "loss": 0.1288, "step": 7809 }, { "epoch": 0.6234533407839068, "grad_norm": 0.32216024429900675, "learning_rate": 6.559425504329724e-06, "loss": 0.1738, "step": 7810 }, { "epoch": 0.6235331683563503, "grad_norm": 0.2995137231166494, "learning_rate": 6.556998005146211e-06, "loss": 0.1702, "step": 7811 }, { "epoch": 0.6236129959287938, "grad_norm": 0.2825366491136937, "learning_rate": 6.554570736113758e-06, "loss": 0.1753, "step": 7812 }, { "epoch": 0.6236928235012373, "grad_norm": 0.2747395787260299, "learning_rate": 6.552143697394615e-06, "loss": 0.1898, "step": 7813 }, { "epoch": 0.6237726510736809, "grad_norm": 0.26859208452643146, "learning_rate": 6.549716889151021e-06, "loss": 0.1818, "step": 7814 }, { "epoch": 0.6238524786461244, "grad_norm": 0.29833293629530666, "learning_rate": 6.5472903115452005e-06, "loss": 0.1769, "step": 7815 }, { "epoch": 0.6239323062185679, "grad_norm": 0.3053109040591716, "learning_rate": 6.5448639647393555e-06, "loss": 0.1664, "step": 7816 }, { "epoch": 0.6240121337910114, "grad_norm": 0.2919029377396456, "learning_rate": 6.542437848895684e-06, "loss": 0.1589, "step": 7817 }, { "epoch": 0.6240919613634549, "grad_norm": 0.25305977051491035, "learning_rate": 6.540011964176356e-06, "loss": 0.1702, "step": 7818 }, { "epoch": 0.6241717889358984, "grad_norm": 0.35048035346361145, "learning_rate": 6.53758631074354e-06, "loss": 0.1537, "step": 7819 }, { "epoch": 0.6242516165083419, "grad_norm": 0.29556792872525667, "learning_rate": 6.535160888759378e-06, "loss": 0.1392, "step": 7820 }, { "epoch": 0.6243314440807856, "grad_norm": 0.2832692222227855, "learning_rate": 6.532735698385991e-06, "loss": 0.1412, "step": 7821 }, { "epoch": 0.6244112716532291, "grad_norm": 0.29475077552258766, "learning_rate": 6.530310739785508e-06, "loss": 0.1764, "step": 7822 }, { "epoch": 0.6244910992256726, "grad_norm": 0.30537540830905074, "learning_rate": 6.527886013120016e-06, "loss": 0.1378, "step": 7823 }, { "epoch": 0.6245709267981161, "grad_norm": 0.28601946850773585, "learning_rate": 6.5254615185516065e-06, "loss": 0.1448, "step": 7824 }, { "epoch": 0.6246507543705596, "grad_norm": 0.30865383422704407, "learning_rate": 6.523037256242343e-06, "loss": 0.1823, "step": 7825 }, { "epoch": 0.6247305819430031, "grad_norm": 0.318732751880465, "learning_rate": 6.52061322635428e-06, "loss": 0.1934, "step": 7826 }, { "epoch": 0.6248104095154466, "grad_norm": 0.3053453140586429, "learning_rate": 6.518189429049458e-06, "loss": 0.1385, "step": 7827 }, { "epoch": 0.6248902370878902, "grad_norm": 0.2946441847804023, "learning_rate": 6.515765864489885e-06, "loss": 0.1486, "step": 7828 }, { "epoch": 0.6249700646603337, "grad_norm": 0.3269858245531055, "learning_rate": 6.513342532837581e-06, "loss": 0.1846, "step": 7829 }, { "epoch": 0.6250498922327772, "grad_norm": 0.2782692406946694, "learning_rate": 6.5109194342545255e-06, "loss": 0.1638, "step": 7830 }, { "epoch": 0.6251297198052207, "grad_norm": 0.3272433276381445, "learning_rate": 6.508496568902705e-06, "loss": 0.1285, "step": 7831 }, { "epoch": 0.6252095473776642, "grad_norm": 0.272164737785981, "learning_rate": 6.506073936944064e-06, "loss": 0.1887, "step": 7832 }, { "epoch": 0.6252893749501077, "grad_norm": 0.25315665928496117, "learning_rate": 6.503651538540561e-06, "loss": 0.1698, "step": 7833 }, { "epoch": 0.6253692025225512, "grad_norm": 0.3242168106159073, "learning_rate": 6.5012293738541136e-06, "loss": 0.1919, "step": 7834 }, { "epoch": 0.6254490300949949, "grad_norm": 0.28294548139195014, "learning_rate": 6.498807443046633e-06, "loss": 0.1718, "step": 7835 }, { "epoch": 0.6255288576674384, "grad_norm": 0.3389038291423267, "learning_rate": 6.496385746280023e-06, "loss": 0.1463, "step": 7836 }, { "epoch": 0.6256086852398819, "grad_norm": 0.2661800895756085, "learning_rate": 6.493964283716158e-06, "loss": 0.1811, "step": 7837 }, { "epoch": 0.6256885128123254, "grad_norm": 0.2718081761781734, "learning_rate": 6.4915430555169065e-06, "loss": 0.1805, "step": 7838 }, { "epoch": 0.6257683403847689, "grad_norm": 0.2618960021139361, "learning_rate": 6.489122061844117e-06, "loss": 0.1417, "step": 7839 }, { "epoch": 0.6258481679572124, "grad_norm": 0.25212174238966917, "learning_rate": 6.486701302859625e-06, "loss": 0.1618, "step": 7840 }, { "epoch": 0.625927995529656, "grad_norm": 0.29273384173569766, "learning_rate": 6.484280778725247e-06, "loss": 0.1883, "step": 7841 }, { "epoch": 0.6260078231020995, "grad_norm": 0.2755860467500087, "learning_rate": 6.481860489602785e-06, "loss": 0.1514, "step": 7842 }, { "epoch": 0.626087650674543, "grad_norm": 0.2565310130968941, "learning_rate": 6.479440435654028e-06, "loss": 0.1486, "step": 7843 }, { "epoch": 0.6261674782469865, "grad_norm": 0.2837731071250057, "learning_rate": 6.477020617040741e-06, "loss": 0.2238, "step": 7844 }, { "epoch": 0.62624730581943, "grad_norm": 0.2881020536755051, "learning_rate": 6.4746010339246886e-06, "loss": 0.1447, "step": 7845 }, { "epoch": 0.6263271333918735, "grad_norm": 0.28295032310319695, "learning_rate": 6.472181686467603e-06, "loss": 0.145, "step": 7846 }, { "epoch": 0.626406960964317, "grad_norm": 0.3041661306442352, "learning_rate": 6.469762574831213e-06, "loss": 0.2259, "step": 7847 }, { "epoch": 0.6264867885367607, "grad_norm": 0.24875690098859363, "learning_rate": 6.467343699177222e-06, "loss": 0.1289, "step": 7848 }, { "epoch": 0.6265666161092042, "grad_norm": 0.28048707389218214, "learning_rate": 6.4649250596673265e-06, "loss": 0.1785, "step": 7849 }, { "epoch": 0.6266464436816477, "grad_norm": 0.4006194009287461, "learning_rate": 6.4625066564632e-06, "loss": 0.1721, "step": 7850 }, { "epoch": 0.6267262712540912, "grad_norm": 0.26356713039881463, "learning_rate": 6.460088489726503e-06, "loss": 0.1597, "step": 7851 }, { "epoch": 0.6268060988265347, "grad_norm": 0.3103431529865744, "learning_rate": 6.4576705596188826e-06, "loss": 0.1783, "step": 7852 }, { "epoch": 0.6268859263989782, "grad_norm": 0.3053448185185749, "learning_rate": 6.455252866301966e-06, "loss": 0.1393, "step": 7853 }, { "epoch": 0.6269657539714217, "grad_norm": 0.2849499234468462, "learning_rate": 6.452835409937369e-06, "loss": 0.146, "step": 7854 }, { "epoch": 0.6270455815438652, "grad_norm": 0.31137428980290827, "learning_rate": 6.450418190686685e-06, "loss": 0.19, "step": 7855 }, { "epoch": 0.6271254091163088, "grad_norm": 0.34798692764556266, "learning_rate": 6.448001208711501e-06, "loss": 0.1942, "step": 7856 }, { "epoch": 0.6272052366887523, "grad_norm": 0.2922141992479677, "learning_rate": 6.445584464173378e-06, "loss": 0.1856, "step": 7857 }, { "epoch": 0.6272850642611958, "grad_norm": 0.33354342743307097, "learning_rate": 6.443167957233864e-06, "loss": 0.1773, "step": 7858 }, { "epoch": 0.6273648918336393, "grad_norm": 0.30119526118721446, "learning_rate": 6.440751688054501e-06, "loss": 0.1455, "step": 7859 }, { "epoch": 0.6274447194060828, "grad_norm": 0.26085243899194716, "learning_rate": 6.438335656796798e-06, "loss": 0.1511, "step": 7860 }, { "epoch": 0.6275245469785263, "grad_norm": 0.2707437463118293, "learning_rate": 6.4359198636222645e-06, "loss": 0.1325, "step": 7861 }, { "epoch": 0.62760437455097, "grad_norm": 0.2602201696127452, "learning_rate": 6.433504308692381e-06, "loss": 0.1554, "step": 7862 }, { "epoch": 0.6276842021234135, "grad_norm": 0.36946266281326084, "learning_rate": 6.431088992168622e-06, "loss": 0.1569, "step": 7863 }, { "epoch": 0.627764029695857, "grad_norm": 0.2917920171934992, "learning_rate": 6.428673914212444e-06, "loss": 0.1829, "step": 7864 }, { "epoch": 0.6278438572683005, "grad_norm": 0.29595545121358796, "learning_rate": 6.426259074985274e-06, "loss": 0.1714, "step": 7865 }, { "epoch": 0.627923684840744, "grad_norm": 0.2951972820414369, "learning_rate": 6.423844474648548e-06, "loss": 0.1668, "step": 7866 }, { "epoch": 0.6280035124131875, "grad_norm": 0.35608510832280993, "learning_rate": 6.421430113363661e-06, "loss": 0.142, "step": 7867 }, { "epoch": 0.628083339985631, "grad_norm": 0.2652854124256633, "learning_rate": 6.4190159912920146e-06, "loss": 0.1405, "step": 7868 }, { "epoch": 0.6281631675580746, "grad_norm": 0.33601155451576387, "learning_rate": 6.416602108594972e-06, "loss": 0.1665, "step": 7869 }, { "epoch": 0.6282429951305181, "grad_norm": 0.27779819440252096, "learning_rate": 6.4141884654339014e-06, "loss": 0.1897, "step": 7870 }, { "epoch": 0.6283228227029616, "grad_norm": 0.27788739323131634, "learning_rate": 6.411775061970144e-06, "loss": 0.1566, "step": 7871 }, { "epoch": 0.6284026502754051, "grad_norm": 0.3226390013641138, "learning_rate": 6.409361898365017e-06, "loss": 0.1378, "step": 7872 }, { "epoch": 0.6284824778478486, "grad_norm": 0.29112788332097556, "learning_rate": 6.406948974779843e-06, "loss": 0.1635, "step": 7873 }, { "epoch": 0.6285623054202921, "grad_norm": 0.30395262298541226, "learning_rate": 6.4045362913759055e-06, "loss": 0.1584, "step": 7874 }, { "epoch": 0.6286421329927356, "grad_norm": 0.2930700519504859, "learning_rate": 6.402123848314494e-06, "loss": 0.1786, "step": 7875 }, { "epoch": 0.6287219605651793, "grad_norm": 0.2779971854733781, "learning_rate": 6.399711645756858e-06, "loss": 0.1563, "step": 7876 }, { "epoch": 0.6288017881376228, "grad_norm": 0.3232509668552497, "learning_rate": 6.397299683864258e-06, "loss": 0.1424, "step": 7877 }, { "epoch": 0.6288816157100663, "grad_norm": 0.3229653692692099, "learning_rate": 6.39488796279791e-06, "loss": 0.165, "step": 7878 }, { "epoch": 0.6289614432825098, "grad_norm": 0.2984581714066299, "learning_rate": 6.392476482719041e-06, "loss": 0.1986, "step": 7879 }, { "epoch": 0.6290412708549533, "grad_norm": 0.3167147670594716, "learning_rate": 6.390065243788841e-06, "loss": 0.1556, "step": 7880 }, { "epoch": 0.6291210984273968, "grad_norm": 0.27503766398148566, "learning_rate": 6.387654246168491e-06, "loss": 0.1732, "step": 7881 }, { "epoch": 0.6292009259998403, "grad_norm": 0.25910236759583694, "learning_rate": 6.385243490019161e-06, "loss": 0.1332, "step": 7882 }, { "epoch": 0.6292807535722839, "grad_norm": 0.2818807322362321, "learning_rate": 6.382832975501996e-06, "loss": 0.1444, "step": 7883 }, { "epoch": 0.6293605811447274, "grad_norm": 0.28745664506767804, "learning_rate": 6.380422702778135e-06, "loss": 0.1296, "step": 7884 }, { "epoch": 0.6294404087171709, "grad_norm": 0.279394355538255, "learning_rate": 6.378012672008689e-06, "loss": 0.2175, "step": 7885 }, { "epoch": 0.6295202362896144, "grad_norm": 0.3065264524348574, "learning_rate": 6.375602883354765e-06, "loss": 0.1671, "step": 7886 }, { "epoch": 0.6296000638620579, "grad_norm": 0.2865471101310031, "learning_rate": 6.3731933369774445e-06, "loss": 0.1671, "step": 7887 }, { "epoch": 0.6296798914345014, "grad_norm": 0.31124646657027855, "learning_rate": 6.3707840330377955e-06, "loss": 0.2201, "step": 7888 }, { "epoch": 0.629759719006945, "grad_norm": 0.25299576670872975, "learning_rate": 6.3683749716968725e-06, "loss": 0.1639, "step": 7889 }, { "epoch": 0.6298395465793886, "grad_norm": 0.2956485931302587, "learning_rate": 6.365966153115709e-06, "loss": 0.1426, "step": 7890 }, { "epoch": 0.6299193741518321, "grad_norm": 0.2777438762202138, "learning_rate": 6.363557577455329e-06, "loss": 0.1486, "step": 7891 }, { "epoch": 0.6299992017242756, "grad_norm": 0.2833679665921222, "learning_rate": 6.361149244876732e-06, "loss": 0.197, "step": 7892 }, { "epoch": 0.6300790292967191, "grad_norm": 0.2948088558214389, "learning_rate": 6.358741155540909e-06, "loss": 0.1863, "step": 7893 }, { "epoch": 0.6301588568691626, "grad_norm": 0.27985247503551797, "learning_rate": 6.356333309608831e-06, "loss": 0.1561, "step": 7894 }, { "epoch": 0.6302386844416061, "grad_norm": 0.3091204450305319, "learning_rate": 6.353925707241448e-06, "loss": 0.1466, "step": 7895 }, { "epoch": 0.6303185120140496, "grad_norm": 0.2803897909945065, "learning_rate": 6.351518348599705e-06, "loss": 0.1693, "step": 7896 }, { "epoch": 0.6303983395864932, "grad_norm": 0.2755503294109376, "learning_rate": 6.34911123384452e-06, "loss": 0.1521, "step": 7897 }, { "epoch": 0.6304781671589367, "grad_norm": 0.34181732938303816, "learning_rate": 6.346704363136804e-06, "loss": 0.1384, "step": 7898 }, { "epoch": 0.6305579947313802, "grad_norm": 0.31387836031533206, "learning_rate": 6.344297736637441e-06, "loss": 0.1321, "step": 7899 }, { "epoch": 0.6306378223038237, "grad_norm": 0.29515974299872816, "learning_rate": 6.341891354507308e-06, "loss": 0.1398, "step": 7900 }, { "epoch": 0.6307176498762672, "grad_norm": 0.30826355706989406, "learning_rate": 6.339485216907263e-06, "loss": 0.124, "step": 7901 }, { "epoch": 0.6307974774487107, "grad_norm": 0.26907492251347404, "learning_rate": 6.337079323998142e-06, "loss": 0.1684, "step": 7902 }, { "epoch": 0.6308773050211544, "grad_norm": 0.34111526566268835, "learning_rate": 6.334673675940775e-06, "loss": 0.1582, "step": 7903 }, { "epoch": 0.6309571325935979, "grad_norm": 0.32222890959618, "learning_rate": 6.332268272895965e-06, "loss": 0.2213, "step": 7904 }, { "epoch": 0.6310369601660414, "grad_norm": 0.2995060165220768, "learning_rate": 6.329863115024508e-06, "loss": 0.1808, "step": 7905 }, { "epoch": 0.6311167877384849, "grad_norm": 0.3226297642930607, "learning_rate": 6.327458202487176e-06, "loss": 0.1801, "step": 7906 }, { "epoch": 0.6311966153109284, "grad_norm": 0.2540201973207128, "learning_rate": 6.325053535444732e-06, "loss": 0.1419, "step": 7907 }, { "epoch": 0.6312764428833719, "grad_norm": 0.29602655373081554, "learning_rate": 6.322649114057917e-06, "loss": 0.2282, "step": 7908 }, { "epoch": 0.6313562704558154, "grad_norm": 0.28943607712851027, "learning_rate": 6.32024493848745e-06, "loss": 0.1489, "step": 7909 }, { "epoch": 0.631436098028259, "grad_norm": 0.26791392533150027, "learning_rate": 6.317841008894052e-06, "loss": 0.0933, "step": 7910 }, { "epoch": 0.6315159256007025, "grad_norm": 0.2816582140725664, "learning_rate": 6.315437325438405e-06, "loss": 0.1433, "step": 7911 }, { "epoch": 0.631595753173146, "grad_norm": 0.3178588495185634, "learning_rate": 6.313033888281197e-06, "loss": 0.1768, "step": 7912 }, { "epoch": 0.6316755807455895, "grad_norm": 0.3149705072254681, "learning_rate": 6.310630697583078e-06, "loss": 0.2106, "step": 7913 }, { "epoch": 0.631755408318033, "grad_norm": 0.287908944158742, "learning_rate": 6.308227753504702e-06, "loss": 0.1372, "step": 7914 }, { "epoch": 0.6318352358904765, "grad_norm": 0.27731353454429, "learning_rate": 6.305825056206685e-06, "loss": 0.1517, "step": 7915 }, { "epoch": 0.6319150634629201, "grad_norm": 0.27172022102153875, "learning_rate": 6.303422605849647e-06, "loss": 0.1384, "step": 7916 }, { "epoch": 0.6319948910353637, "grad_norm": 0.26009920060030606, "learning_rate": 6.3010204025941805e-06, "loss": 0.1738, "step": 7917 }, { "epoch": 0.6320747186078072, "grad_norm": 0.331415747276695, "learning_rate": 6.298618446600856e-06, "loss": 0.1836, "step": 7918 }, { "epoch": 0.6321545461802507, "grad_norm": 0.27724593291086613, "learning_rate": 6.296216738030248e-06, "loss": 0.1613, "step": 7919 }, { "epoch": 0.6322343737526942, "grad_norm": 0.28316244279087727, "learning_rate": 6.293815277042885e-06, "loss": 0.1945, "step": 7920 }, { "epoch": 0.6323142013251377, "grad_norm": 0.2925756183112349, "learning_rate": 6.291414063799311e-06, "loss": 0.156, "step": 7921 }, { "epoch": 0.6323940288975812, "grad_norm": 0.2890880118653985, "learning_rate": 6.289013098460024e-06, "loss": 0.1692, "step": 7922 }, { "epoch": 0.6324738564700247, "grad_norm": 0.2655552521652515, "learning_rate": 6.286612381185533e-06, "loss": 0.1235, "step": 7923 }, { "epoch": 0.6325536840424683, "grad_norm": 0.24828874835123957, "learning_rate": 6.2842119121363045e-06, "loss": 0.1722, "step": 7924 }, { "epoch": 0.6326335116149118, "grad_norm": 0.29629076317448483, "learning_rate": 6.281811691472804e-06, "loss": 0.169, "step": 7925 }, { "epoch": 0.6327133391873553, "grad_norm": 0.2620029225686588, "learning_rate": 6.27941171935548e-06, "loss": 0.1638, "step": 7926 }, { "epoch": 0.6327931667597988, "grad_norm": 0.25543896027641405, "learning_rate": 6.277011995944755e-06, "loss": 0.1077, "step": 7927 }, { "epoch": 0.6328729943322423, "grad_norm": 0.2735678194740406, "learning_rate": 6.274612521401049e-06, "loss": 0.155, "step": 7928 }, { "epoch": 0.6329528219046858, "grad_norm": 0.27957449200081297, "learning_rate": 6.272213295884749e-06, "loss": 0.1845, "step": 7929 }, { "epoch": 0.6330326494771294, "grad_norm": 0.29594388652648124, "learning_rate": 6.26981431955624e-06, "loss": 0.1887, "step": 7930 }, { "epoch": 0.633112477049573, "grad_norm": 0.4546275924091171, "learning_rate": 6.267415592575883e-06, "loss": 0.1497, "step": 7931 }, { "epoch": 0.6331923046220165, "grad_norm": 0.3390713624242323, "learning_rate": 6.2650171151040195e-06, "loss": 0.1965, "step": 7932 }, { "epoch": 0.63327213219446, "grad_norm": 0.2861016180177078, "learning_rate": 6.262618887300983e-06, "loss": 0.1729, "step": 7933 }, { "epoch": 0.6333519597669035, "grad_norm": 0.2913163046127994, "learning_rate": 6.260220909327082e-06, "loss": 0.1717, "step": 7934 }, { "epoch": 0.633431787339347, "grad_norm": 0.3408618032778429, "learning_rate": 6.2578231813426136e-06, "loss": 0.199, "step": 7935 }, { "epoch": 0.6335116149117905, "grad_norm": 0.3176545081431736, "learning_rate": 6.2554257035078556e-06, "loss": 0.1491, "step": 7936 }, { "epoch": 0.633591442484234, "grad_norm": 0.31227415433150973, "learning_rate": 6.253028475983072e-06, "loss": 0.1515, "step": 7937 }, { "epoch": 0.6336712700566776, "grad_norm": 0.3290321642850945, "learning_rate": 6.250631498928507e-06, "loss": 0.1788, "step": 7938 }, { "epoch": 0.6337510976291211, "grad_norm": 0.2784825298237532, "learning_rate": 6.248234772504385e-06, "loss": 0.1533, "step": 7939 }, { "epoch": 0.6338309252015646, "grad_norm": 0.3082292605205891, "learning_rate": 6.245838296870926e-06, "loss": 0.1545, "step": 7940 }, { "epoch": 0.6339107527740081, "grad_norm": 0.31659454811858057, "learning_rate": 6.243442072188315e-06, "loss": 0.13, "step": 7941 }, { "epoch": 0.6339905803464516, "grad_norm": 0.2734553956007434, "learning_rate": 6.2410460986167384e-06, "loss": 0.1872, "step": 7942 }, { "epoch": 0.6340704079188952, "grad_norm": 0.30961549592261767, "learning_rate": 6.238650376316353e-06, "loss": 0.1861, "step": 7943 }, { "epoch": 0.6341502354913388, "grad_norm": 0.2970372242406895, "learning_rate": 6.2362549054473055e-06, "loss": 0.1468, "step": 7944 }, { "epoch": 0.6342300630637823, "grad_norm": 0.2705402976909534, "learning_rate": 6.233859686169721e-06, "loss": 0.1758, "step": 7945 }, { "epoch": 0.6343098906362258, "grad_norm": 0.305412714896584, "learning_rate": 6.231464718643716e-06, "loss": 0.1659, "step": 7946 }, { "epoch": 0.6343897182086693, "grad_norm": 0.2692033379172832, "learning_rate": 6.229070003029378e-06, "loss": 0.137, "step": 7947 }, { "epoch": 0.6344695457811128, "grad_norm": 0.2564155422359042, "learning_rate": 6.2266755394867885e-06, "loss": 0.1747, "step": 7948 }, { "epoch": 0.6345493733535563, "grad_norm": 0.30402995762751917, "learning_rate": 6.224281328176005e-06, "loss": 0.1726, "step": 7949 }, { "epoch": 0.6346292009259998, "grad_norm": 0.26201742032989855, "learning_rate": 6.2218873692570736e-06, "loss": 0.1511, "step": 7950 }, { "epoch": 0.6347090284984434, "grad_norm": 0.3109988508204117, "learning_rate": 6.21949366289002e-06, "loss": 0.1889, "step": 7951 }, { "epoch": 0.6347888560708869, "grad_norm": 0.27997384516840357, "learning_rate": 6.217100209234853e-06, "loss": 0.1489, "step": 7952 }, { "epoch": 0.6348686836433304, "grad_norm": 0.29872875627476037, "learning_rate": 6.2147070084515685e-06, "loss": 0.1847, "step": 7953 }, { "epoch": 0.6349485112157739, "grad_norm": 0.2355550720866576, "learning_rate": 6.212314060700142e-06, "loss": 0.196, "step": 7954 }, { "epoch": 0.6350283387882174, "grad_norm": 0.31039549040541586, "learning_rate": 6.2099213661405256e-06, "loss": 0.1507, "step": 7955 }, { "epoch": 0.6351081663606609, "grad_norm": 0.34099001061942813, "learning_rate": 6.207528924932672e-06, "loss": 0.2004, "step": 7956 }, { "epoch": 0.6351879939331045, "grad_norm": 0.2897154760263863, "learning_rate": 6.205136737236496e-06, "loss": 0.1707, "step": 7957 }, { "epoch": 0.6352678215055481, "grad_norm": 0.27740850142399615, "learning_rate": 6.202744803211917e-06, "loss": 0.1597, "step": 7958 }, { "epoch": 0.6353476490779916, "grad_norm": 0.3120255703231068, "learning_rate": 6.200353123018814e-06, "loss": 0.136, "step": 7959 }, { "epoch": 0.6354274766504351, "grad_norm": 0.2719888984467714, "learning_rate": 6.197961696817076e-06, "loss": 0.1707, "step": 7960 }, { "epoch": 0.6355073042228786, "grad_norm": 0.3080478538967973, "learning_rate": 6.195570524766548e-06, "loss": 0.1769, "step": 7961 }, { "epoch": 0.6355871317953221, "grad_norm": 0.32193169400461796, "learning_rate": 6.193179607027071e-06, "loss": 0.159, "step": 7962 }, { "epoch": 0.6356669593677656, "grad_norm": 0.2955242751415057, "learning_rate": 6.190788943758479e-06, "loss": 0.1292, "step": 7963 }, { "epoch": 0.6357467869402091, "grad_norm": 0.31048447893547904, "learning_rate": 6.188398535120565e-06, "loss": 0.1595, "step": 7964 }, { "epoch": 0.6358266145126527, "grad_norm": 0.2953215642513078, "learning_rate": 6.186008381273132e-06, "loss": 0.1712, "step": 7965 }, { "epoch": 0.6359064420850962, "grad_norm": 0.32187046341517356, "learning_rate": 6.183618482375937e-06, "loss": 0.1726, "step": 7966 }, { "epoch": 0.6359862696575397, "grad_norm": 0.311831714639984, "learning_rate": 6.181228838588751e-06, "loss": 0.1573, "step": 7967 }, { "epoch": 0.6360660972299832, "grad_norm": 0.3315368634888326, "learning_rate": 6.178839450071302e-06, "loss": 0.125, "step": 7968 }, { "epoch": 0.6361459248024267, "grad_norm": 0.33371199520362205, "learning_rate": 6.176450316983312e-06, "loss": 0.1285, "step": 7969 }, { "epoch": 0.6362257523748703, "grad_norm": 0.33193464147092555, "learning_rate": 6.174061439484489e-06, "loss": 0.161, "step": 7970 }, { "epoch": 0.6363055799473138, "grad_norm": 0.28600100285895885, "learning_rate": 6.171672817734515e-06, "loss": 0.1495, "step": 7971 }, { "epoch": 0.6363854075197574, "grad_norm": 0.33806654169233147, "learning_rate": 6.1692844518930675e-06, "loss": 0.1611, "step": 7972 }, { "epoch": 0.6364652350922009, "grad_norm": 0.2553279340067126, "learning_rate": 6.166896342119791e-06, "loss": 0.1617, "step": 7973 }, { "epoch": 0.6365450626646444, "grad_norm": 0.2829548872715143, "learning_rate": 6.164508488574326e-06, "loss": 0.1458, "step": 7974 }, { "epoch": 0.6366248902370879, "grad_norm": 0.251250701805331, "learning_rate": 6.162120891416287e-06, "loss": 0.1776, "step": 7975 }, { "epoch": 0.6367047178095314, "grad_norm": 0.32877405178303054, "learning_rate": 6.159733550805282e-06, "loss": 0.1483, "step": 7976 }, { "epoch": 0.6367845453819749, "grad_norm": 0.28092610148663705, "learning_rate": 6.157346466900892e-06, "loss": 0.1922, "step": 7977 }, { "epoch": 0.6368643729544184, "grad_norm": 0.280759455293686, "learning_rate": 6.154959639862681e-06, "loss": 0.164, "step": 7978 }, { "epoch": 0.636944200526862, "grad_norm": 0.2899150637825831, "learning_rate": 6.152573069850203e-06, "loss": 0.1714, "step": 7979 }, { "epoch": 0.6370240280993055, "grad_norm": 0.37723586619571486, "learning_rate": 6.150186757022989e-06, "loss": 0.2115, "step": 7980 }, { "epoch": 0.637103855671749, "grad_norm": 0.3514384985206014, "learning_rate": 6.147800701540556e-06, "loss": 0.2054, "step": 7981 }, { "epoch": 0.6371836832441925, "grad_norm": 0.25411251111576727, "learning_rate": 6.1454149035623986e-06, "loss": 0.1515, "step": 7982 }, { "epoch": 0.637263510816636, "grad_norm": 0.30532901194572903, "learning_rate": 6.143029363248003e-06, "loss": 0.1197, "step": 7983 }, { "epoch": 0.6373433383890796, "grad_norm": 0.2731484263491166, "learning_rate": 6.140644080756832e-06, "loss": 0.127, "step": 7984 }, { "epoch": 0.6374231659615232, "grad_norm": 0.3055417380967953, "learning_rate": 6.138259056248328e-06, "loss": 0.1234, "step": 7985 }, { "epoch": 0.6375029935339667, "grad_norm": 0.3101880133262638, "learning_rate": 6.135874289881927e-06, "loss": 0.1519, "step": 7986 }, { "epoch": 0.6375828211064102, "grad_norm": 0.32119893729524995, "learning_rate": 6.1334897818170335e-06, "loss": 0.1281, "step": 7987 }, { "epoch": 0.6376626486788537, "grad_norm": 0.2709152460992918, "learning_rate": 6.13110553221305e-06, "loss": 0.1721, "step": 7988 }, { "epoch": 0.6377424762512972, "grad_norm": 0.34963343447740025, "learning_rate": 6.128721541229348e-06, "loss": 0.203, "step": 7989 }, { "epoch": 0.6378223038237407, "grad_norm": 0.3395392412775255, "learning_rate": 6.126337809025294e-06, "loss": 0.2471, "step": 7990 }, { "epoch": 0.6379021313961842, "grad_norm": 0.3530077744469661, "learning_rate": 6.1239543357602275e-06, "loss": 0.1757, "step": 7991 }, { "epoch": 0.6379819589686277, "grad_norm": 0.34508272638995346, "learning_rate": 6.121571121593472e-06, "loss": 0.1626, "step": 7992 }, { "epoch": 0.6380617865410713, "grad_norm": 0.2975713249768821, "learning_rate": 6.11918816668434e-06, "loss": 0.1364, "step": 7993 }, { "epoch": 0.6381416141135148, "grad_norm": 0.3068852393208346, "learning_rate": 6.11680547119212e-06, "loss": 0.1465, "step": 7994 }, { "epoch": 0.6382214416859583, "grad_norm": 0.26016947327752615, "learning_rate": 6.114423035276087e-06, "loss": 0.1708, "step": 7995 }, { "epoch": 0.6383012692584018, "grad_norm": 0.2719230066364622, "learning_rate": 6.112040859095497e-06, "loss": 0.1432, "step": 7996 }, { "epoch": 0.6383810968308454, "grad_norm": 0.3257702031783222, "learning_rate": 6.109658942809591e-06, "loss": 0.2416, "step": 7997 }, { "epoch": 0.6384609244032889, "grad_norm": 0.30679812077610746, "learning_rate": 6.107277286577591e-06, "loss": 0.1588, "step": 7998 }, { "epoch": 0.6385407519757325, "grad_norm": 0.37026247488982933, "learning_rate": 6.104895890558693e-06, "loss": 0.1885, "step": 7999 }, { "epoch": 0.638620579548176, "grad_norm": 0.29255030776951324, "learning_rate": 6.102514754912096e-06, "loss": 0.1723, "step": 8000 }, { "epoch": 0.6387004071206195, "grad_norm": 0.3000823975970797, "learning_rate": 6.100133879796956e-06, "loss": 0.1224, "step": 8001 }, { "epoch": 0.638780234693063, "grad_norm": 0.32894253503927656, "learning_rate": 6.097753265372442e-06, "loss": 0.1498, "step": 8002 }, { "epoch": 0.6388600622655065, "grad_norm": 0.33548653371616427, "learning_rate": 6.095372911797673e-06, "loss": 0.1871, "step": 8003 }, { "epoch": 0.63893988983795, "grad_norm": 0.36600249492671416, "learning_rate": 6.092992819231776e-06, "loss": 0.1901, "step": 8004 }, { "epoch": 0.6390197174103935, "grad_norm": 0.3343221605773924, "learning_rate": 6.090612987833847e-06, "loss": 0.135, "step": 8005 }, { "epoch": 0.639099544982837, "grad_norm": 0.286924536290849, "learning_rate": 6.088233417762967e-06, "loss": 0.1562, "step": 8006 }, { "epoch": 0.6391793725552806, "grad_norm": 0.28945142416708414, "learning_rate": 6.085854109178204e-06, "loss": 0.1844, "step": 8007 }, { "epoch": 0.6392592001277241, "grad_norm": 0.29934430370202897, "learning_rate": 6.0834750622386e-06, "loss": 0.1327, "step": 8008 }, { "epoch": 0.6393390277001676, "grad_norm": 0.3487692290890535, "learning_rate": 6.081096277103193e-06, "loss": 0.1519, "step": 8009 }, { "epoch": 0.6394188552726111, "grad_norm": 0.2578646692834321, "learning_rate": 6.078717753930987e-06, "loss": 0.1797, "step": 8010 }, { "epoch": 0.6394986828450547, "grad_norm": 0.30192367616206034, "learning_rate": 6.076339492880987e-06, "loss": 0.1706, "step": 8011 }, { "epoch": 0.6395785104174982, "grad_norm": 0.2681492081410076, "learning_rate": 6.073961494112159e-06, "loss": 0.2044, "step": 8012 }, { "epoch": 0.6396583379899418, "grad_norm": 0.2870492982294218, "learning_rate": 6.071583757783472e-06, "loss": 0.1684, "step": 8013 }, { "epoch": 0.6397381655623853, "grad_norm": 0.2723445493164569, "learning_rate": 6.069206284053864e-06, "loss": 0.1608, "step": 8014 }, { "epoch": 0.6398179931348288, "grad_norm": 0.2985143116163884, "learning_rate": 6.066829073082259e-06, "loss": 0.1629, "step": 8015 }, { "epoch": 0.6398978207072723, "grad_norm": 0.30156761713099145, "learning_rate": 6.064452125027564e-06, "loss": 0.1245, "step": 8016 }, { "epoch": 0.6399776482797158, "grad_norm": 0.28707650941911295, "learning_rate": 6.062075440048671e-06, "loss": 0.1595, "step": 8017 }, { "epoch": 0.6400574758521593, "grad_norm": 0.3199522309443021, "learning_rate": 6.059699018304451e-06, "loss": 0.1748, "step": 8018 }, { "epoch": 0.6401373034246028, "grad_norm": 0.29463347432878917, "learning_rate": 6.057322859953757e-06, "loss": 0.1929, "step": 8019 }, { "epoch": 0.6402171309970464, "grad_norm": 0.3220791128526782, "learning_rate": 6.054946965155428e-06, "loss": 0.172, "step": 8020 }, { "epoch": 0.6402969585694899, "grad_norm": 0.27647063764098345, "learning_rate": 6.052571334068283e-06, "loss": 0.1843, "step": 8021 }, { "epoch": 0.6403767861419334, "grad_norm": 0.2623195881434972, "learning_rate": 6.05019596685112e-06, "loss": 0.1814, "step": 8022 }, { "epoch": 0.6404566137143769, "grad_norm": 0.2577586018204231, "learning_rate": 6.047820863662729e-06, "loss": 0.1873, "step": 8023 }, { "epoch": 0.6405364412868204, "grad_norm": 0.2670361715474913, "learning_rate": 6.04544602466187e-06, "loss": 0.1751, "step": 8024 }, { "epoch": 0.640616268859264, "grad_norm": 0.28414380289500896, "learning_rate": 6.0430714500072965e-06, "loss": 0.1591, "step": 8025 }, { "epoch": 0.6406960964317076, "grad_norm": 0.31179648906273905, "learning_rate": 6.040697139857734e-06, "loss": 0.1327, "step": 8026 }, { "epoch": 0.6407759240041511, "grad_norm": 0.2864780227769379, "learning_rate": 6.038323094371903e-06, "loss": 0.1896, "step": 8027 }, { "epoch": 0.6408557515765946, "grad_norm": 0.2998800279750208, "learning_rate": 6.0359493137084945e-06, "loss": 0.1419, "step": 8028 }, { "epoch": 0.6409355791490381, "grad_norm": 0.256422160455758, "learning_rate": 6.033575798026184e-06, "loss": 0.1638, "step": 8029 }, { "epoch": 0.6410154067214816, "grad_norm": 0.30132646621739795, "learning_rate": 6.031202547483638e-06, "loss": 0.1514, "step": 8030 }, { "epoch": 0.6410952342939251, "grad_norm": 0.36127700050924194, "learning_rate": 6.0288295622394936e-06, "loss": 0.2067, "step": 8031 }, { "epoch": 0.6411750618663686, "grad_norm": 0.27020103730171524, "learning_rate": 6.026456842452377e-06, "loss": 0.1148, "step": 8032 }, { "epoch": 0.6412548894388121, "grad_norm": 0.25551572910953896, "learning_rate": 6.024084388280895e-06, "loss": 0.1503, "step": 8033 }, { "epoch": 0.6413347170112557, "grad_norm": 0.28750600944795834, "learning_rate": 6.02171219988364e-06, "loss": 0.1889, "step": 8034 }, { "epoch": 0.6414145445836992, "grad_norm": 0.33149654074744334, "learning_rate": 6.019340277419181e-06, "loss": 0.1651, "step": 8035 }, { "epoch": 0.6414943721561427, "grad_norm": 0.2596753950861568, "learning_rate": 6.016968621046067e-06, "loss": 0.1172, "step": 8036 }, { "epoch": 0.6415741997285862, "grad_norm": 0.33849419298642647, "learning_rate": 6.014597230922843e-06, "loss": 0.1292, "step": 8037 }, { "epoch": 0.6416540273010298, "grad_norm": 0.3035241245109077, "learning_rate": 6.012226107208018e-06, "loss": 0.187, "step": 8038 }, { "epoch": 0.6417338548734733, "grad_norm": 0.28817110130680135, "learning_rate": 6.0098552500600995e-06, "loss": 0.1771, "step": 8039 }, { "epoch": 0.6418136824459169, "grad_norm": 0.28378942548837, "learning_rate": 6.007484659637563e-06, "loss": 0.1487, "step": 8040 }, { "epoch": 0.6418935100183604, "grad_norm": 0.32506117333847306, "learning_rate": 6.005114336098882e-06, "loss": 0.1777, "step": 8041 }, { "epoch": 0.6419733375908039, "grad_norm": 0.3281489064970222, "learning_rate": 6.002744279602495e-06, "loss": 0.1843, "step": 8042 }, { "epoch": 0.6420531651632474, "grad_norm": 0.33740457007435115, "learning_rate": 6.000374490306835e-06, "loss": 0.1647, "step": 8043 }, { "epoch": 0.6421329927356909, "grad_norm": 0.27575471981143007, "learning_rate": 5.9980049683703146e-06, "loss": 0.1344, "step": 8044 }, { "epoch": 0.6422128203081344, "grad_norm": 0.2614443803715684, "learning_rate": 5.995635713951318e-06, "loss": 0.118, "step": 8045 }, { "epoch": 0.6422926478805779, "grad_norm": 0.31909608736626033, "learning_rate": 5.993266727208234e-06, "loss": 0.1607, "step": 8046 }, { "epoch": 0.6423724754530215, "grad_norm": 0.24887441883925399, "learning_rate": 5.990898008299406e-06, "loss": 0.1932, "step": 8047 }, { "epoch": 0.642452303025465, "grad_norm": 0.2958440299539491, "learning_rate": 5.988529557383187e-06, "loss": 0.1644, "step": 8048 }, { "epoch": 0.6425321305979085, "grad_norm": 0.2583504043645402, "learning_rate": 5.986161374617887e-06, "loss": 0.2135, "step": 8049 }, { "epoch": 0.642611958170352, "grad_norm": 0.286666813620616, "learning_rate": 5.9837934601618195e-06, "loss": 0.1486, "step": 8050 }, { "epoch": 0.6426917857427955, "grad_norm": 0.2499564251133014, "learning_rate": 5.981425814173263e-06, "loss": 0.2764, "step": 8051 }, { "epoch": 0.6427716133152391, "grad_norm": 0.28580868836534856, "learning_rate": 5.9790584368104865e-06, "loss": 0.1718, "step": 8052 }, { "epoch": 0.6428514408876826, "grad_norm": 0.2660528612917578, "learning_rate": 5.976691328231743e-06, "loss": 0.1657, "step": 8053 }, { "epoch": 0.6429312684601262, "grad_norm": 0.30897754729482024, "learning_rate": 5.974324488595258e-06, "loss": 0.1794, "step": 8054 }, { "epoch": 0.6430110960325697, "grad_norm": 0.34290174248648536, "learning_rate": 5.971957918059255e-06, "loss": 0.1463, "step": 8055 }, { "epoch": 0.6430909236050132, "grad_norm": 0.2967299447204013, "learning_rate": 5.969591616781919e-06, "loss": 0.1459, "step": 8056 }, { "epoch": 0.6431707511774567, "grad_norm": 0.30211097711824847, "learning_rate": 5.967225584921439e-06, "loss": 0.1951, "step": 8057 }, { "epoch": 0.6432505787499002, "grad_norm": 0.3272060661646029, "learning_rate": 5.964859822635969e-06, "loss": 0.1752, "step": 8058 }, { "epoch": 0.6433304063223437, "grad_norm": 0.3065614744680589, "learning_rate": 5.962494330083647e-06, "loss": 0.1985, "step": 8059 }, { "epoch": 0.6434102338947872, "grad_norm": 0.3704886480344751, "learning_rate": 5.960129107422603e-06, "loss": 0.1665, "step": 8060 }, { "epoch": 0.6434900614672308, "grad_norm": 0.27403977178670524, "learning_rate": 5.95776415481094e-06, "loss": 0.1548, "step": 8061 }, { "epoch": 0.6435698890396743, "grad_norm": 0.3299378781696817, "learning_rate": 5.955399472406747e-06, "loss": 0.1224, "step": 8062 }, { "epoch": 0.6436497166121178, "grad_norm": 0.25615931954118953, "learning_rate": 5.953035060368093e-06, "loss": 0.2077, "step": 8063 }, { "epoch": 0.6437295441845613, "grad_norm": 0.275036000609862, "learning_rate": 5.95067091885303e-06, "loss": 0.1512, "step": 8064 }, { "epoch": 0.6438093717570049, "grad_norm": 0.2803837038231813, "learning_rate": 5.948307048019592e-06, "loss": 0.1806, "step": 8065 }, { "epoch": 0.6438891993294484, "grad_norm": 0.2827153760537518, "learning_rate": 5.945943448025791e-06, "loss": 0.1745, "step": 8066 }, { "epoch": 0.643969026901892, "grad_norm": 0.28093216122350045, "learning_rate": 5.94358011902963e-06, "loss": 0.1361, "step": 8067 }, { "epoch": 0.6440488544743355, "grad_norm": 0.27485667887756526, "learning_rate": 5.941217061189082e-06, "loss": 0.1328, "step": 8068 }, { "epoch": 0.644128682046779, "grad_norm": 0.2786297830227066, "learning_rate": 5.938854274662114e-06, "loss": 0.1586, "step": 8069 }, { "epoch": 0.6442085096192225, "grad_norm": 0.3084150611121719, "learning_rate": 5.936491759606662e-06, "loss": 0.2015, "step": 8070 }, { "epoch": 0.644288337191666, "grad_norm": 0.3793037506412601, "learning_rate": 5.934129516180659e-06, "loss": 0.1788, "step": 8071 }, { "epoch": 0.6443681647641095, "grad_norm": 0.2819058561203337, "learning_rate": 5.931767544542007e-06, "loss": 0.1463, "step": 8072 }, { "epoch": 0.644447992336553, "grad_norm": 0.32912311263279465, "learning_rate": 5.929405844848592e-06, "loss": 0.159, "step": 8073 }, { "epoch": 0.6445278199089965, "grad_norm": 0.28064791991814336, "learning_rate": 5.92704441725829e-06, "loss": 0.1661, "step": 8074 }, { "epoch": 0.6446076474814401, "grad_norm": 0.27297325779444015, "learning_rate": 5.924683261928948e-06, "loss": 0.1648, "step": 8075 }, { "epoch": 0.6446874750538836, "grad_norm": 0.2649243289399622, "learning_rate": 5.922322379018405e-06, "loss": 0.1494, "step": 8076 }, { "epoch": 0.6447673026263271, "grad_norm": 0.28670818556691957, "learning_rate": 5.9199617686844715e-06, "loss": 0.1568, "step": 8077 }, { "epoch": 0.6448471301987706, "grad_norm": 0.25099585909032285, "learning_rate": 5.9176014310849495e-06, "loss": 0.1494, "step": 8078 }, { "epoch": 0.6449269577712142, "grad_norm": 0.29731884227534544, "learning_rate": 5.915241366377613e-06, "loss": 0.1793, "step": 8079 }, { "epoch": 0.6450067853436577, "grad_norm": 0.28020835215395884, "learning_rate": 5.912881574720229e-06, "loss": 0.1864, "step": 8080 }, { "epoch": 0.6450866129161013, "grad_norm": 0.32436273802309673, "learning_rate": 5.910522056270539e-06, "loss": 0.2111, "step": 8081 }, { "epoch": 0.6451664404885448, "grad_norm": 0.2873482359449141, "learning_rate": 5.9081628111862595e-06, "loss": 0.1756, "step": 8082 }, { "epoch": 0.6452462680609883, "grad_norm": 0.3708629355322537, "learning_rate": 5.905803839625107e-06, "loss": 0.1529, "step": 8083 }, { "epoch": 0.6453260956334318, "grad_norm": 0.3502528858841469, "learning_rate": 5.903445141744763e-06, "loss": 0.1362, "step": 8084 }, { "epoch": 0.6454059232058753, "grad_norm": 0.30598964338925855, "learning_rate": 5.901086717702903e-06, "loss": 0.1754, "step": 8085 }, { "epoch": 0.6454857507783188, "grad_norm": 0.3020906938787636, "learning_rate": 5.898728567657171e-06, "loss": 0.1876, "step": 8086 }, { "epoch": 0.6455655783507623, "grad_norm": 0.29373452177197, "learning_rate": 5.8963706917652075e-06, "loss": 0.1716, "step": 8087 }, { "epoch": 0.6456454059232059, "grad_norm": 0.3029733269235708, "learning_rate": 5.894013090184625e-06, "loss": 0.1939, "step": 8088 }, { "epoch": 0.6457252334956494, "grad_norm": 0.3364933496799368, "learning_rate": 5.891655763073012e-06, "loss": 0.1931, "step": 8089 }, { "epoch": 0.6458050610680929, "grad_norm": 0.30538891332237506, "learning_rate": 5.889298710587958e-06, "loss": 0.1765, "step": 8090 }, { "epoch": 0.6458848886405364, "grad_norm": 0.29242646605730704, "learning_rate": 5.886941932887012e-06, "loss": 0.1697, "step": 8091 }, { "epoch": 0.64596471621298, "grad_norm": 0.3163161512061886, "learning_rate": 5.884585430127727e-06, "loss": 0.1267, "step": 8092 }, { "epoch": 0.6460445437854235, "grad_norm": 0.30187300507214204, "learning_rate": 5.882229202467613e-06, "loss": 0.1273, "step": 8093 }, { "epoch": 0.646124371357867, "grad_norm": 0.2979923389854453, "learning_rate": 5.879873250064188e-06, "loss": 0.1744, "step": 8094 }, { "epoch": 0.6462041989303106, "grad_norm": 0.29898514675826804, "learning_rate": 5.877517573074928e-06, "loss": 0.1774, "step": 8095 }, { "epoch": 0.6462840265027541, "grad_norm": 0.3045891814167646, "learning_rate": 5.875162171657303e-06, "loss": 0.2048, "step": 8096 }, { "epoch": 0.6463638540751976, "grad_norm": 0.2833135497371059, "learning_rate": 5.872807045968765e-06, "loss": 0.1538, "step": 8097 }, { "epoch": 0.6464436816476411, "grad_norm": 0.2559870191996148, "learning_rate": 5.870452196166739e-06, "loss": 0.1266, "step": 8098 }, { "epoch": 0.6465235092200846, "grad_norm": 0.283911206244436, "learning_rate": 5.868097622408645e-06, "loss": 0.1897, "step": 8099 }, { "epoch": 0.6466033367925281, "grad_norm": 0.2730983370335907, "learning_rate": 5.865743324851869e-06, "loss": 0.193, "step": 8100 }, { "epoch": 0.6466831643649716, "grad_norm": 0.321688000737912, "learning_rate": 5.863389303653797e-06, "loss": 0.1909, "step": 8101 }, { "epoch": 0.6467629919374152, "grad_norm": 0.3314663386139955, "learning_rate": 5.861035558971777e-06, "loss": 0.1922, "step": 8102 }, { "epoch": 0.6468428195098587, "grad_norm": 0.31171508606460946, "learning_rate": 5.858682090963148e-06, "loss": 0.1198, "step": 8103 }, { "epoch": 0.6469226470823022, "grad_norm": 0.2784467812725199, "learning_rate": 5.856328899785235e-06, "loss": 0.1615, "step": 8104 }, { "epoch": 0.6470024746547457, "grad_norm": 0.2819879526435919, "learning_rate": 5.853975985595335e-06, "loss": 0.169, "step": 8105 }, { "epoch": 0.6470823022271893, "grad_norm": 0.30023046861747055, "learning_rate": 5.8516233485507345e-06, "loss": 0.1492, "step": 8106 }, { "epoch": 0.6471621297996328, "grad_norm": 0.35873536257030686, "learning_rate": 5.849270988808695e-06, "loss": 0.2125, "step": 8107 }, { "epoch": 0.6472419573720763, "grad_norm": 0.2976540789873977, "learning_rate": 5.8469189065264645e-06, "loss": 0.1407, "step": 8108 }, { "epoch": 0.6473217849445199, "grad_norm": 0.26228978992893853, "learning_rate": 5.844567101861269e-06, "loss": 0.1678, "step": 8109 }, { "epoch": 0.6474016125169634, "grad_norm": 0.27619639265605495, "learning_rate": 5.842215574970318e-06, "loss": 0.1453, "step": 8110 }, { "epoch": 0.6474814400894069, "grad_norm": 0.30881935867537436, "learning_rate": 5.839864326010803e-06, "loss": 0.1157, "step": 8111 }, { "epoch": 0.6475612676618504, "grad_norm": 0.2677408466783681, "learning_rate": 5.8375133551398924e-06, "loss": 0.1874, "step": 8112 }, { "epoch": 0.6476410952342939, "grad_norm": 0.25228750737358635, "learning_rate": 5.8351626625147435e-06, "loss": 0.1831, "step": 8113 }, { "epoch": 0.6477209228067374, "grad_norm": 0.2850289880785207, "learning_rate": 5.8328122482924845e-06, "loss": 0.1284, "step": 8114 }, { "epoch": 0.647800750379181, "grad_norm": 0.2557918667548753, "learning_rate": 5.830462112630242e-06, "loss": 0.1806, "step": 8115 }, { "epoch": 0.6478805779516245, "grad_norm": 0.3018016410102133, "learning_rate": 5.828112255685101e-06, "loss": 0.1618, "step": 8116 }, { "epoch": 0.647960405524068, "grad_norm": 0.26796112295898755, "learning_rate": 5.825762677614148e-06, "loss": 0.1635, "step": 8117 }, { "epoch": 0.6480402330965115, "grad_norm": 0.313427905426042, "learning_rate": 5.823413378574441e-06, "loss": 0.1396, "step": 8118 }, { "epoch": 0.6481200606689551, "grad_norm": 0.2712241177089335, "learning_rate": 5.821064358723021e-06, "loss": 0.1841, "step": 8119 }, { "epoch": 0.6481998882413986, "grad_norm": 0.30621499651442646, "learning_rate": 5.818715618216913e-06, "loss": 0.1868, "step": 8120 }, { "epoch": 0.6482797158138421, "grad_norm": 0.2819880914735704, "learning_rate": 5.816367157213112e-06, "loss": 0.147, "step": 8121 }, { "epoch": 0.6483595433862857, "grad_norm": 0.2832680148897792, "learning_rate": 5.814018975868615e-06, "loss": 0.1523, "step": 8122 }, { "epoch": 0.6484393709587292, "grad_norm": 0.2626786004493915, "learning_rate": 5.811671074340379e-06, "loss": 0.2122, "step": 8123 }, { "epoch": 0.6485191985311727, "grad_norm": 0.2606540002716854, "learning_rate": 5.809323452785367e-06, "loss": 0.1618, "step": 8124 }, { "epoch": 0.6485990261036162, "grad_norm": 0.2827777202846254, "learning_rate": 5.806976111360492e-06, "loss": 0.141, "step": 8125 }, { "epoch": 0.6486788536760597, "grad_norm": 0.24978179176530624, "learning_rate": 5.804629050222667e-06, "loss": 0.1337, "step": 8126 }, { "epoch": 0.6487586812485032, "grad_norm": 0.3240994852844619, "learning_rate": 5.80228226952879e-06, "loss": 0.1589, "step": 8127 }, { "epoch": 0.6488385088209467, "grad_norm": 0.27278718770546145, "learning_rate": 5.799935769435733e-06, "loss": 0.1534, "step": 8128 }, { "epoch": 0.6489183363933902, "grad_norm": 0.29322924518407456, "learning_rate": 5.797589550100348e-06, "loss": 0.1404, "step": 8129 }, { "epoch": 0.6489981639658338, "grad_norm": 0.2513867065194426, "learning_rate": 5.795243611679466e-06, "loss": 0.1684, "step": 8130 }, { "epoch": 0.6490779915382773, "grad_norm": 0.3165288353550733, "learning_rate": 5.792897954329913e-06, "loss": 0.1595, "step": 8131 }, { "epoch": 0.6491578191107208, "grad_norm": 0.30394095199864235, "learning_rate": 5.7905525782084826e-06, "loss": 0.1413, "step": 8132 }, { "epoch": 0.6492376466831644, "grad_norm": 0.30726025792638295, "learning_rate": 5.788207483471954e-06, "loss": 0.1371, "step": 8133 }, { "epoch": 0.6493174742556079, "grad_norm": 0.25297564354947233, "learning_rate": 5.785862670277087e-06, "loss": 0.1793, "step": 8134 }, { "epoch": 0.6493973018280514, "grad_norm": 0.3422217826344491, "learning_rate": 5.78351813878062e-06, "loss": 0.196, "step": 8135 }, { "epoch": 0.649477129400495, "grad_norm": 0.29621342556869545, "learning_rate": 5.781173889139281e-06, "loss": 0.1371, "step": 8136 }, { "epoch": 0.6495569569729385, "grad_norm": 0.3560174872086391, "learning_rate": 5.778829921509773e-06, "loss": 0.1681, "step": 8137 }, { "epoch": 0.649636784545382, "grad_norm": 0.29027830454741765, "learning_rate": 5.776486236048779e-06, "loss": 0.1322, "step": 8138 }, { "epoch": 0.6497166121178255, "grad_norm": 0.317003475058968, "learning_rate": 5.7741428329129675e-06, "loss": 0.1955, "step": 8139 }, { "epoch": 0.649796439690269, "grad_norm": 0.2850499032152026, "learning_rate": 5.77179971225898e-06, "loss": 0.1957, "step": 8140 }, { "epoch": 0.6498762672627125, "grad_norm": 0.28237833356299635, "learning_rate": 5.769456874243453e-06, "loss": 0.1924, "step": 8141 }, { "epoch": 0.649956094835156, "grad_norm": 0.28069887312616487, "learning_rate": 5.76711431902299e-06, "loss": 0.2054, "step": 8142 }, { "epoch": 0.6500359224075996, "grad_norm": 0.27474606487968223, "learning_rate": 5.764772046754185e-06, "loss": 0.1494, "step": 8143 }, { "epoch": 0.6501157499800431, "grad_norm": 0.3158585270922086, "learning_rate": 5.762430057593604e-06, "loss": 0.1632, "step": 8144 }, { "epoch": 0.6501955775524866, "grad_norm": 0.2728340185665379, "learning_rate": 5.760088351697808e-06, "loss": 0.1445, "step": 8145 }, { "epoch": 0.6502754051249302, "grad_norm": 0.2887295630174002, "learning_rate": 5.757746929223326e-06, "loss": 0.1811, "step": 8146 }, { "epoch": 0.6503552326973737, "grad_norm": 0.2774081183866286, "learning_rate": 5.7554057903266735e-06, "loss": 0.1941, "step": 8147 }, { "epoch": 0.6504350602698172, "grad_norm": 0.3277310593825686, "learning_rate": 5.753064935164348e-06, "loss": 0.1932, "step": 8148 }, { "epoch": 0.6505148878422607, "grad_norm": 0.28202201563228096, "learning_rate": 5.750724363892818e-06, "loss": 0.1938, "step": 8149 }, { "epoch": 0.6505947154147043, "grad_norm": 0.2827696158417258, "learning_rate": 5.748384076668557e-06, "loss": 0.1534, "step": 8150 }, { "epoch": 0.6506745429871478, "grad_norm": 0.3257304997449594, "learning_rate": 5.746044073647988e-06, "loss": 0.1596, "step": 8151 }, { "epoch": 0.6507543705595913, "grad_norm": 0.3210559791092669, "learning_rate": 5.7437043549875405e-06, "loss": 0.1673, "step": 8152 }, { "epoch": 0.6508341981320348, "grad_norm": 0.26869776880752105, "learning_rate": 5.74136492084361e-06, "loss": 0.1599, "step": 8153 }, { "epoch": 0.6509140257044783, "grad_norm": 0.3006783413940038, "learning_rate": 5.7390257713725836e-06, "loss": 0.1599, "step": 8154 }, { "epoch": 0.6509938532769218, "grad_norm": 0.27993119228829016, "learning_rate": 5.736686906730829e-06, "loss": 0.1661, "step": 8155 }, { "epoch": 0.6510736808493653, "grad_norm": 0.3072381372983065, "learning_rate": 5.734348327074673e-06, "loss": 0.1712, "step": 8156 }, { "epoch": 0.6511535084218089, "grad_norm": 0.25705131294257044, "learning_rate": 5.732010032560453e-06, "loss": 0.1852, "step": 8157 }, { "epoch": 0.6512333359942524, "grad_norm": 0.29214865546745444, "learning_rate": 5.72967202334447e-06, "loss": 0.1679, "step": 8158 }, { "epoch": 0.6513131635666959, "grad_norm": 0.25114184331733297, "learning_rate": 5.727334299583021e-06, "loss": 0.147, "step": 8159 }, { "epoch": 0.6513929911391395, "grad_norm": 0.3062670442827855, "learning_rate": 5.724996861432357e-06, "loss": 0.1503, "step": 8160 }, { "epoch": 0.651472818711583, "grad_norm": 0.28617956549950907, "learning_rate": 5.722659709048739e-06, "loss": 0.1868, "step": 8161 }, { "epoch": 0.6515526462840265, "grad_norm": 0.30208926111096174, "learning_rate": 5.72032284258839e-06, "loss": 0.1799, "step": 8162 }, { "epoch": 0.65163247385647, "grad_norm": 0.25805809961286175, "learning_rate": 5.717986262207525e-06, "loss": 0.1892, "step": 8163 }, { "epoch": 0.6517123014289136, "grad_norm": 0.30130650406340675, "learning_rate": 5.715649968062332e-06, "loss": 0.1453, "step": 8164 }, { "epoch": 0.6517921290013571, "grad_norm": 0.24585382913000148, "learning_rate": 5.71331396030898e-06, "loss": 0.1894, "step": 8165 }, { "epoch": 0.6518719565738006, "grad_norm": 0.285645036108227, "learning_rate": 5.710978239103628e-06, "loss": 0.137, "step": 8166 }, { "epoch": 0.6519517841462441, "grad_norm": 0.3178324498708377, "learning_rate": 5.708642804602409e-06, "loss": 0.1455, "step": 8167 }, { "epoch": 0.6520316117186876, "grad_norm": 0.2828379055463508, "learning_rate": 5.7063076569614355e-06, "loss": 0.1602, "step": 8168 }, { "epoch": 0.6521114392911311, "grad_norm": 0.32795164904515045, "learning_rate": 5.703972796336803e-06, "loss": 0.1862, "step": 8169 }, { "epoch": 0.6521912668635746, "grad_norm": 0.2862787829127564, "learning_rate": 5.7016382228845825e-06, "loss": 0.165, "step": 8170 }, { "epoch": 0.6522710944360182, "grad_norm": 0.3387712138029007, "learning_rate": 5.699303936760843e-06, "loss": 0.1936, "step": 8171 }, { "epoch": 0.6523509220084617, "grad_norm": 0.2921424567023751, "learning_rate": 5.696969938121613e-06, "loss": 0.179, "step": 8172 }, { "epoch": 0.6524307495809052, "grad_norm": 0.2821166048828726, "learning_rate": 5.694636227122915e-06, "loss": 0.1489, "step": 8173 }, { "epoch": 0.6525105771533488, "grad_norm": 0.3137973914349291, "learning_rate": 5.692302803920743e-06, "loss": 0.1519, "step": 8174 }, { "epoch": 0.6525904047257923, "grad_norm": 0.3055400719961571, "learning_rate": 5.689969668671085e-06, "loss": 0.1488, "step": 8175 }, { "epoch": 0.6526702322982358, "grad_norm": 0.31159531675078056, "learning_rate": 5.687636821529898e-06, "loss": 0.1759, "step": 8176 }, { "epoch": 0.6527500598706794, "grad_norm": 0.3323621992416121, "learning_rate": 5.685304262653124e-06, "loss": 0.2053, "step": 8177 }, { "epoch": 0.6528298874431229, "grad_norm": 0.30206206113504536, "learning_rate": 5.682971992196684e-06, "loss": 0.1856, "step": 8178 }, { "epoch": 0.6529097150155664, "grad_norm": 0.27997651276445185, "learning_rate": 5.680640010316479e-06, "loss": 0.1512, "step": 8179 }, { "epoch": 0.6529895425880099, "grad_norm": 0.27143179860544003, "learning_rate": 5.678308317168399e-06, "loss": 0.1397, "step": 8180 }, { "epoch": 0.6530693701604534, "grad_norm": 0.2880677466423468, "learning_rate": 5.675976912908308e-06, "loss": 0.1574, "step": 8181 }, { "epoch": 0.6531491977328969, "grad_norm": 0.3139275774094517, "learning_rate": 5.673645797692045e-06, "loss": 0.1485, "step": 8182 }, { "epoch": 0.6532290253053404, "grad_norm": 0.26703272302708264, "learning_rate": 5.671314971675436e-06, "loss": 0.1856, "step": 8183 }, { "epoch": 0.653308852877784, "grad_norm": 0.32388191427167473, "learning_rate": 5.668984435014295e-06, "loss": 0.1485, "step": 8184 }, { "epoch": 0.6533886804502275, "grad_norm": 0.2696753625831702, "learning_rate": 5.666654187864406e-06, "loss": 0.137, "step": 8185 }, { "epoch": 0.653468508022671, "grad_norm": 0.28326848433223706, "learning_rate": 5.664324230381535e-06, "loss": 0.1503, "step": 8186 }, { "epoch": 0.6535483355951146, "grad_norm": 0.2681484932668277, "learning_rate": 5.66199456272143e-06, "loss": 0.1937, "step": 8187 }, { "epoch": 0.6536281631675581, "grad_norm": 0.30767416123840813, "learning_rate": 5.659665185039818e-06, "loss": 0.1502, "step": 8188 }, { "epoch": 0.6537079907400016, "grad_norm": 0.28445942701716287, "learning_rate": 5.657336097492417e-06, "loss": 0.1254, "step": 8189 }, { "epoch": 0.6537878183124451, "grad_norm": 0.32441786533695005, "learning_rate": 5.655007300234909e-06, "loss": 0.1658, "step": 8190 }, { "epoch": 0.6538676458848887, "grad_norm": 0.2930123011233628, "learning_rate": 5.65267879342297e-06, "loss": 0.209, "step": 8191 }, { "epoch": 0.6539474734573322, "grad_norm": 0.2540262191934028, "learning_rate": 5.65035057721225e-06, "loss": 0.1736, "step": 8192 }, { "epoch": 0.6540273010297757, "grad_norm": 0.3055015851333593, "learning_rate": 5.648022651758376e-06, "loss": 0.1634, "step": 8193 }, { "epoch": 0.6541071286022192, "grad_norm": 0.29118423237982305, "learning_rate": 5.645695017216973e-06, "loss": 0.1517, "step": 8194 }, { "epoch": 0.6541869561746627, "grad_norm": 0.2840913404079538, "learning_rate": 5.643367673743619e-06, "loss": 0.1287, "step": 8195 }, { "epoch": 0.6542667837471062, "grad_norm": 0.28185336248352677, "learning_rate": 5.641040621493897e-06, "loss": 0.186, "step": 8196 }, { "epoch": 0.6543466113195497, "grad_norm": 0.28897445598875127, "learning_rate": 5.638713860623357e-06, "loss": 0.1512, "step": 8197 }, { "epoch": 0.6544264388919933, "grad_norm": 0.2806614306030189, "learning_rate": 5.6363873912875385e-06, "loss": 0.1333, "step": 8198 }, { "epoch": 0.6545062664644368, "grad_norm": 0.3954466216081588, "learning_rate": 5.634061213641959e-06, "loss": 0.1471, "step": 8199 }, { "epoch": 0.6545860940368803, "grad_norm": 0.3154561141855195, "learning_rate": 5.631735327842103e-06, "loss": 0.1755, "step": 8200 }, { "epoch": 0.6546659216093239, "grad_norm": 0.34156454255628294, "learning_rate": 5.629409734043456e-06, "loss": 0.2041, "step": 8201 }, { "epoch": 0.6547457491817674, "grad_norm": 0.2747278010397103, "learning_rate": 5.6270844324014675e-06, "loss": 0.1617, "step": 8202 }, { "epoch": 0.6548255767542109, "grad_norm": 0.25392114039936037, "learning_rate": 5.624759423071588e-06, "loss": 0.1551, "step": 8203 }, { "epoch": 0.6549054043266545, "grad_norm": 0.25523695611736075, "learning_rate": 5.622434706209218e-06, "loss": 0.1946, "step": 8204 }, { "epoch": 0.654985231899098, "grad_norm": 0.24556731667303902, "learning_rate": 5.620110281969768e-06, "loss": 0.1735, "step": 8205 }, { "epoch": 0.6550650594715415, "grad_norm": 0.2962586892659307, "learning_rate": 5.6177861505086126e-06, "loss": 0.1499, "step": 8206 }, { "epoch": 0.655144887043985, "grad_norm": 0.3058089168126628, "learning_rate": 5.61546231198111e-06, "loss": 0.1585, "step": 8207 }, { "epoch": 0.6552247146164285, "grad_norm": 0.29734117898718143, "learning_rate": 5.613138766542602e-06, "loss": 0.1728, "step": 8208 }, { "epoch": 0.655304542188872, "grad_norm": 0.31525782818742665, "learning_rate": 5.610815514348402e-06, "loss": 0.1492, "step": 8209 }, { "epoch": 0.6553843697613155, "grad_norm": 0.294357235621256, "learning_rate": 5.60849255555382e-06, "loss": 0.1619, "step": 8210 }, { "epoch": 0.655464197333759, "grad_norm": 0.28504569060663487, "learning_rate": 5.606169890314129e-06, "loss": 0.1569, "step": 8211 }, { "epoch": 0.6555440249062026, "grad_norm": 0.29895739685368283, "learning_rate": 5.603847518784595e-06, "loss": 0.1328, "step": 8212 }, { "epoch": 0.6556238524786461, "grad_norm": 0.32715669799730257, "learning_rate": 5.601525441120452e-06, "loss": 0.136, "step": 8213 }, { "epoch": 0.6557036800510897, "grad_norm": 0.2696038137737255, "learning_rate": 5.59920365747693e-06, "loss": 0.1567, "step": 8214 }, { "epoch": 0.6557835076235332, "grad_norm": 0.272663941795461, "learning_rate": 5.596882168009229e-06, "loss": 0.1686, "step": 8215 }, { "epoch": 0.6558633351959767, "grad_norm": 0.2698037239510728, "learning_rate": 5.5945609728725296e-06, "loss": 0.2093, "step": 8216 }, { "epoch": 0.6559431627684202, "grad_norm": 0.3092273292960924, "learning_rate": 5.592240072221995e-06, "loss": 0.1461, "step": 8217 }, { "epoch": 0.6560229903408638, "grad_norm": 0.2993094450272611, "learning_rate": 5.589919466212763e-06, "loss": 0.1642, "step": 8218 }, { "epoch": 0.6561028179133073, "grad_norm": 0.26468839011057044, "learning_rate": 5.587599154999968e-06, "loss": 0.1444, "step": 8219 }, { "epoch": 0.6561826454857508, "grad_norm": 0.27541566679387564, "learning_rate": 5.585279138738708e-06, "loss": 0.1646, "step": 8220 }, { "epoch": 0.6562624730581943, "grad_norm": 0.2878815424896577, "learning_rate": 5.582959417584064e-06, "loss": 0.1518, "step": 8221 }, { "epoch": 0.6563423006306378, "grad_norm": 0.31943767320214994, "learning_rate": 5.5806399916911055e-06, "loss": 0.1967, "step": 8222 }, { "epoch": 0.6564221282030813, "grad_norm": 0.299485240222375, "learning_rate": 5.57832086121487e-06, "loss": 0.1724, "step": 8223 }, { "epoch": 0.6565019557755248, "grad_norm": 0.3244294137305399, "learning_rate": 5.5760020263103884e-06, "loss": 0.1371, "step": 8224 }, { "epoch": 0.6565817833479684, "grad_norm": 0.26086172896776494, "learning_rate": 5.573683487132666e-06, "loss": 0.1971, "step": 8225 }, { "epoch": 0.6566616109204119, "grad_norm": 0.3038729477032396, "learning_rate": 5.571365243836684e-06, "loss": 0.1729, "step": 8226 }, { "epoch": 0.6567414384928554, "grad_norm": 0.254210467246921, "learning_rate": 5.569047296577407e-06, "loss": 0.1537, "step": 8227 }, { "epoch": 0.656821266065299, "grad_norm": 0.31424916372951567, "learning_rate": 5.566729645509785e-06, "loss": 0.2224, "step": 8228 }, { "epoch": 0.6569010936377425, "grad_norm": 0.32134364890624495, "learning_rate": 5.564412290788744e-06, "loss": 0.1848, "step": 8229 }, { "epoch": 0.656980921210186, "grad_norm": 0.32089867976820047, "learning_rate": 5.562095232569187e-06, "loss": 0.1827, "step": 8230 }, { "epoch": 0.6570607487826295, "grad_norm": 0.2959728193341664, "learning_rate": 5.559778471006001e-06, "loss": 0.1302, "step": 8231 }, { "epoch": 0.6571405763550731, "grad_norm": 0.2930820609622254, "learning_rate": 5.557462006254048e-06, "loss": 0.1629, "step": 8232 }, { "epoch": 0.6572204039275166, "grad_norm": 0.2603343218876654, "learning_rate": 5.555145838468183e-06, "loss": 0.1781, "step": 8233 }, { "epoch": 0.6573002314999601, "grad_norm": 0.29204653598699876, "learning_rate": 5.552829967803231e-06, "loss": 0.1482, "step": 8234 }, { "epoch": 0.6573800590724036, "grad_norm": 0.3302255783376296, "learning_rate": 5.550514394413992e-06, "loss": 0.1895, "step": 8235 }, { "epoch": 0.6574598866448471, "grad_norm": 0.2921338863291584, "learning_rate": 5.548199118455261e-06, "loss": 0.2004, "step": 8236 }, { "epoch": 0.6575397142172906, "grad_norm": 0.2698037901953166, "learning_rate": 5.545884140081796e-06, "loss": 0.139, "step": 8237 }, { "epoch": 0.6576195417897341, "grad_norm": 0.29839011693810147, "learning_rate": 5.543569459448357e-06, "loss": 0.1495, "step": 8238 }, { "epoch": 0.6576993693621777, "grad_norm": 0.27082076735128635, "learning_rate": 5.541255076709655e-06, "loss": 0.1757, "step": 8239 }, { "epoch": 0.6577791969346212, "grad_norm": 0.27729215553274655, "learning_rate": 5.538940992020412e-06, "loss": 0.1682, "step": 8240 }, { "epoch": 0.6578590245070648, "grad_norm": 0.2747304837309228, "learning_rate": 5.536627205535304e-06, "loss": 0.1704, "step": 8241 }, { "epoch": 0.6579388520795083, "grad_norm": 0.3014904285662524, "learning_rate": 5.534313717409011e-06, "loss": 0.1081, "step": 8242 }, { "epoch": 0.6580186796519518, "grad_norm": 0.2609651890380167, "learning_rate": 5.5320005277961665e-06, "loss": 0.1461, "step": 8243 }, { "epoch": 0.6580985072243953, "grad_norm": 0.3375362490371874, "learning_rate": 5.529687636851409e-06, "loss": 0.1779, "step": 8244 }, { "epoch": 0.6581783347968388, "grad_norm": 0.37115202143492987, "learning_rate": 5.5273750447293414e-06, "loss": 0.143, "step": 8245 }, { "epoch": 0.6582581623692824, "grad_norm": 0.31082515811495226, "learning_rate": 5.525062751584549e-06, "loss": 0.1555, "step": 8246 }, { "epoch": 0.6583379899417259, "grad_norm": 0.28610587423177525, "learning_rate": 5.52275075757161e-06, "loss": 0.1536, "step": 8247 }, { "epoch": 0.6584178175141694, "grad_norm": 0.25853107801364855, "learning_rate": 5.520439062845057e-06, "loss": 0.1676, "step": 8248 }, { "epoch": 0.6584976450866129, "grad_norm": 0.25258451962526884, "learning_rate": 5.518127667559427e-06, "loss": 0.1696, "step": 8249 }, { "epoch": 0.6585774726590564, "grad_norm": 0.28042749154256713, "learning_rate": 5.515816571869225e-06, "loss": 0.1516, "step": 8250 }, { "epoch": 0.6586573002314999, "grad_norm": 0.3182053336534041, "learning_rate": 5.5135057759289445e-06, "loss": 0.1508, "step": 8251 }, { "epoch": 0.6587371278039434, "grad_norm": 0.2988301504695869, "learning_rate": 5.511195279893046e-06, "loss": 0.1579, "step": 8252 }, { "epoch": 0.658816955376387, "grad_norm": 0.27220284557176927, "learning_rate": 5.508885083915974e-06, "loss": 0.1482, "step": 8253 }, { "epoch": 0.6588967829488305, "grad_norm": 0.2668954190749492, "learning_rate": 5.506575188152165e-06, "loss": 0.196, "step": 8254 }, { "epoch": 0.6589766105212741, "grad_norm": 0.2879802735596659, "learning_rate": 5.5042655927560215e-06, "loss": 0.1582, "step": 8255 }, { "epoch": 0.6590564380937176, "grad_norm": 0.23689259520925857, "learning_rate": 5.501956297881933e-06, "loss": 0.162, "step": 8256 }, { "epoch": 0.6591362656661611, "grad_norm": 0.27810270057103004, "learning_rate": 5.499647303684261e-06, "loss": 0.1623, "step": 8257 }, { "epoch": 0.6592160932386046, "grad_norm": 0.24598891142098972, "learning_rate": 5.4973386103173595e-06, "loss": 0.2044, "step": 8258 }, { "epoch": 0.6592959208110482, "grad_norm": 0.37906877852518134, "learning_rate": 5.495030217935555e-06, "loss": 0.1644, "step": 8259 }, { "epoch": 0.6593757483834917, "grad_norm": 0.27928757444107594, "learning_rate": 5.4927221266931516e-06, "loss": 0.1475, "step": 8260 }, { "epoch": 0.6594555759559352, "grad_norm": 0.30393251550141603, "learning_rate": 5.490414336744436e-06, "loss": 0.1748, "step": 8261 }, { "epoch": 0.6595354035283787, "grad_norm": 0.2646424885789746, "learning_rate": 5.488106848243673e-06, "loss": 0.1602, "step": 8262 }, { "epoch": 0.6596152311008222, "grad_norm": 0.27787101459979974, "learning_rate": 5.485799661345115e-06, "loss": 0.1487, "step": 8263 }, { "epoch": 0.6596950586732657, "grad_norm": 0.2998670235592647, "learning_rate": 5.483492776202986e-06, "loss": 0.1779, "step": 8264 }, { "epoch": 0.6597748862457092, "grad_norm": 0.3083790885666669, "learning_rate": 5.48118619297149e-06, "loss": 0.1765, "step": 8265 }, { "epoch": 0.6598547138181527, "grad_norm": 0.2972151072308646, "learning_rate": 5.478879911804814e-06, "loss": 0.161, "step": 8266 }, { "epoch": 0.6599345413905963, "grad_norm": 0.29870692796206494, "learning_rate": 5.476573932857121e-06, "loss": 0.1799, "step": 8267 }, { "epoch": 0.6600143689630399, "grad_norm": 0.31620261661476107, "learning_rate": 5.474268256282562e-06, "loss": 0.1393, "step": 8268 }, { "epoch": 0.6600941965354834, "grad_norm": 0.23937687080158987, "learning_rate": 5.471962882235261e-06, "loss": 0.1698, "step": 8269 }, { "epoch": 0.6601740241079269, "grad_norm": 0.2910567361559908, "learning_rate": 5.469657810869322e-06, "loss": 0.187, "step": 8270 }, { "epoch": 0.6602538516803704, "grad_norm": 0.2770342480742745, "learning_rate": 5.467353042338825e-06, "loss": 0.144, "step": 8271 }, { "epoch": 0.6603336792528139, "grad_norm": 0.28738709365161946, "learning_rate": 5.465048576797844e-06, "loss": 0.2278, "step": 8272 }, { "epoch": 0.6604135068252575, "grad_norm": 0.2975500764041834, "learning_rate": 5.462744414400419e-06, "loss": 0.1369, "step": 8273 }, { "epoch": 0.660493334397701, "grad_norm": 0.29614760617014285, "learning_rate": 5.460440555300575e-06, "loss": 0.1298, "step": 8274 }, { "epoch": 0.6605731619701445, "grad_norm": 0.2661151618504114, "learning_rate": 5.458136999652314e-06, "loss": 0.1665, "step": 8275 }, { "epoch": 0.660652989542588, "grad_norm": 0.2997711985781746, "learning_rate": 5.455833747609616e-06, "loss": 0.1514, "step": 8276 }, { "epoch": 0.6607328171150315, "grad_norm": 0.26716458369658064, "learning_rate": 5.453530799326454e-06, "loss": 0.1269, "step": 8277 }, { "epoch": 0.660812644687475, "grad_norm": 0.36180438517034286, "learning_rate": 5.451228154956765e-06, "loss": 0.1762, "step": 8278 }, { "epoch": 0.6608924722599185, "grad_norm": 0.3139223201882, "learning_rate": 5.448925814654472e-06, "loss": 0.1252, "step": 8279 }, { "epoch": 0.660972299832362, "grad_norm": 0.38337329229697026, "learning_rate": 5.4466237785734745e-06, "loss": 0.1453, "step": 8280 }, { "epoch": 0.6610521274048056, "grad_norm": 0.3137585130105321, "learning_rate": 5.44432204686766e-06, "loss": 0.182, "step": 8281 }, { "epoch": 0.6611319549772492, "grad_norm": 0.2838231173943572, "learning_rate": 5.442020619690895e-06, "loss": 0.1895, "step": 8282 }, { "epoch": 0.6612117825496927, "grad_norm": 0.28908934446760404, "learning_rate": 5.4397194971970034e-06, "loss": 0.1494, "step": 8283 }, { "epoch": 0.6612916101221362, "grad_norm": 0.2646371414049741, "learning_rate": 5.437418679539821e-06, "loss": 0.1655, "step": 8284 }, { "epoch": 0.6613714376945797, "grad_norm": 0.29479327207436307, "learning_rate": 5.435118166873141e-06, "loss": 0.1711, "step": 8285 }, { "epoch": 0.6614512652670232, "grad_norm": 0.2768551922784218, "learning_rate": 5.432817959350753e-06, "loss": 0.18, "step": 8286 }, { "epoch": 0.6615310928394668, "grad_norm": 0.27173994891629727, "learning_rate": 5.430518057126402e-06, "loss": 0.1344, "step": 8287 }, { "epoch": 0.6616109204119103, "grad_norm": 0.2789628501600347, "learning_rate": 5.42821846035384e-06, "loss": 0.1945, "step": 8288 }, { "epoch": 0.6616907479843538, "grad_norm": 0.31314242712820595, "learning_rate": 5.425919169186782e-06, "loss": 0.166, "step": 8289 }, { "epoch": 0.6617705755567973, "grad_norm": 0.28854232322161877, "learning_rate": 5.4236201837789214e-06, "loss": 0.1862, "step": 8290 }, { "epoch": 0.6618504031292408, "grad_norm": 0.2735564107557879, "learning_rate": 5.421321504283949e-06, "loss": 0.1555, "step": 8291 }, { "epoch": 0.6619302307016843, "grad_norm": 0.3403157706137892, "learning_rate": 5.419023130855507e-06, "loss": 0.1706, "step": 8292 }, { "epoch": 0.6620100582741278, "grad_norm": 0.3055920196661985, "learning_rate": 5.4167250636472426e-06, "loss": 0.1595, "step": 8293 }, { "epoch": 0.6620898858465714, "grad_norm": 0.24774185522537573, "learning_rate": 5.414427302812766e-06, "loss": 0.1249, "step": 8294 }, { "epoch": 0.662169713419015, "grad_norm": 0.2975838908718217, "learning_rate": 5.412129848505685e-06, "loss": 0.1394, "step": 8295 }, { "epoch": 0.6622495409914585, "grad_norm": 0.3016132963689658, "learning_rate": 5.409832700879565e-06, "loss": 0.1522, "step": 8296 }, { "epoch": 0.662329368563902, "grad_norm": 0.2775188066191018, "learning_rate": 5.407535860087958e-06, "loss": 0.2035, "step": 8297 }, { "epoch": 0.6624091961363455, "grad_norm": 0.27748589679456287, "learning_rate": 5.405239326284408e-06, "loss": 0.1538, "step": 8298 }, { "epoch": 0.662489023708789, "grad_norm": 0.29006965836642573, "learning_rate": 5.402943099622427e-06, "loss": 0.1531, "step": 8299 }, { "epoch": 0.6625688512812326, "grad_norm": 0.2872909189768285, "learning_rate": 5.4006471802555075e-06, "loss": 0.1428, "step": 8300 }, { "epoch": 0.6626486788536761, "grad_norm": 0.277719937356848, "learning_rate": 5.398351568337118e-06, "loss": 0.2166, "step": 8301 }, { "epoch": 0.6627285064261196, "grad_norm": 0.34444242705734546, "learning_rate": 5.396056264020719e-06, "loss": 0.1504, "step": 8302 }, { "epoch": 0.6628083339985631, "grad_norm": 0.24827492737171578, "learning_rate": 5.393761267459739e-06, "loss": 0.1824, "step": 8303 }, { "epoch": 0.6628881615710066, "grad_norm": 0.26184104368648076, "learning_rate": 5.3914665788075905e-06, "loss": 0.1206, "step": 8304 }, { "epoch": 0.6629679891434501, "grad_norm": 0.2697672519826971, "learning_rate": 5.389172198217662e-06, "loss": 0.1368, "step": 8305 }, { "epoch": 0.6630478167158936, "grad_norm": 0.26152810024000944, "learning_rate": 5.386878125843322e-06, "loss": 0.1294, "step": 8306 }, { "epoch": 0.6631276442883371, "grad_norm": 0.27707841811376877, "learning_rate": 5.384584361837927e-06, "loss": 0.1531, "step": 8307 }, { "epoch": 0.6632074718607807, "grad_norm": 0.28833306657352187, "learning_rate": 5.382290906354801e-06, "loss": 0.1978, "step": 8308 }, { "epoch": 0.6632872994332243, "grad_norm": 0.28295052734061865, "learning_rate": 5.3799977595472555e-06, "loss": 0.1666, "step": 8309 }, { "epoch": 0.6633671270056678, "grad_norm": 0.28168687985332436, "learning_rate": 5.3777049215685714e-06, "loss": 0.1705, "step": 8310 }, { "epoch": 0.6634469545781113, "grad_norm": 0.3061114677218315, "learning_rate": 5.375412392572026e-06, "loss": 0.2084, "step": 8311 }, { "epoch": 0.6635267821505548, "grad_norm": 0.28004965737549736, "learning_rate": 5.37312017271086e-06, "loss": 0.1542, "step": 8312 }, { "epoch": 0.6636066097229983, "grad_norm": 0.2958231119903867, "learning_rate": 5.370828262138299e-06, "loss": 0.1556, "step": 8313 }, { "epoch": 0.6636864372954419, "grad_norm": 0.259249110048867, "learning_rate": 5.368536661007551e-06, "loss": 0.1313, "step": 8314 }, { "epoch": 0.6637662648678854, "grad_norm": 0.2750060032745378, "learning_rate": 5.366245369471793e-06, "loss": 0.1614, "step": 8315 }, { "epoch": 0.6638460924403289, "grad_norm": 0.3218604645343085, "learning_rate": 5.363954387684199e-06, "loss": 0.1783, "step": 8316 }, { "epoch": 0.6639259200127724, "grad_norm": 0.2555637386316008, "learning_rate": 5.361663715797907e-06, "loss": 0.1983, "step": 8317 }, { "epoch": 0.6640057475852159, "grad_norm": 0.29687938372742523, "learning_rate": 5.3593733539660395e-06, "loss": 0.1442, "step": 8318 }, { "epoch": 0.6640855751576594, "grad_norm": 0.2723313132191738, "learning_rate": 5.357083302341699e-06, "loss": 0.1342, "step": 8319 }, { "epoch": 0.6641654027301029, "grad_norm": 0.28331560677963336, "learning_rate": 5.354793561077962e-06, "loss": 0.1744, "step": 8320 }, { "epoch": 0.6642452303025465, "grad_norm": 0.28437821682496783, "learning_rate": 5.352504130327896e-06, "loss": 0.1711, "step": 8321 }, { "epoch": 0.6643250578749901, "grad_norm": 0.2873133160457572, "learning_rate": 5.350215010244537e-06, "loss": 0.1749, "step": 8322 }, { "epoch": 0.6644048854474336, "grad_norm": 0.34550358044165275, "learning_rate": 5.347926200980904e-06, "loss": 0.1806, "step": 8323 }, { "epoch": 0.6644847130198771, "grad_norm": 0.3161245938759937, "learning_rate": 5.3456377026899895e-06, "loss": 0.1492, "step": 8324 }, { "epoch": 0.6645645405923206, "grad_norm": 0.25927646776321916, "learning_rate": 5.343349515524779e-06, "loss": 0.1497, "step": 8325 }, { "epoch": 0.6646443681647641, "grad_norm": 0.28777751022070347, "learning_rate": 5.34106163963823e-06, "loss": 0.1928, "step": 8326 }, { "epoch": 0.6647241957372076, "grad_norm": 0.31616446223845124, "learning_rate": 5.338774075183264e-06, "loss": 0.1515, "step": 8327 }, { "epoch": 0.6648040233096512, "grad_norm": 0.25581673022162804, "learning_rate": 5.336486822312809e-06, "loss": 0.1538, "step": 8328 }, { "epoch": 0.6648838508820947, "grad_norm": 0.33232314909394545, "learning_rate": 5.33419988117975e-06, "loss": 0.1922, "step": 8329 }, { "epoch": 0.6649636784545382, "grad_norm": 0.2898104380934565, "learning_rate": 5.331913251936973e-06, "loss": 0.1315, "step": 8330 }, { "epoch": 0.6650435060269817, "grad_norm": 0.27925709230676155, "learning_rate": 5.329626934737313e-06, "loss": 0.1694, "step": 8331 }, { "epoch": 0.6651233335994252, "grad_norm": 0.2921844766404672, "learning_rate": 5.327340929733614e-06, "loss": 0.1498, "step": 8332 }, { "epoch": 0.6652031611718687, "grad_norm": 0.28706718774029044, "learning_rate": 5.325055237078682e-06, "loss": 0.1199, "step": 8333 }, { "epoch": 0.6652829887443122, "grad_norm": 0.23008952313819642, "learning_rate": 5.322769856925307e-06, "loss": 0.1588, "step": 8334 }, { "epoch": 0.6653628163167558, "grad_norm": 0.24151098392334006, "learning_rate": 5.3204847894262565e-06, "loss": 0.1308, "step": 8335 }, { "epoch": 0.6654426438891994, "grad_norm": 0.2973864988779204, "learning_rate": 5.318200034734277e-06, "loss": 0.1483, "step": 8336 }, { "epoch": 0.6655224714616429, "grad_norm": 0.276194622006673, "learning_rate": 5.3159155930021e-06, "loss": 0.1509, "step": 8337 }, { "epoch": 0.6656022990340864, "grad_norm": 0.3041863784435452, "learning_rate": 5.313631464382425e-06, "loss": 0.2236, "step": 8338 }, { "epoch": 0.6656821266065299, "grad_norm": 0.3631496939194048, "learning_rate": 5.311347649027948e-06, "loss": 0.1865, "step": 8339 }, { "epoch": 0.6657619541789734, "grad_norm": 0.301348946806285, "learning_rate": 5.309064147091319e-06, "loss": 0.1732, "step": 8340 }, { "epoch": 0.665841781751417, "grad_norm": 0.306434908664995, "learning_rate": 5.306780958725191e-06, "loss": 0.1027, "step": 8341 }, { "epoch": 0.6659216093238605, "grad_norm": 0.3023681346946055, "learning_rate": 5.304498084082182e-06, "loss": 0.1502, "step": 8342 }, { "epoch": 0.666001436896304, "grad_norm": 0.321973028834042, "learning_rate": 5.302215523314895e-06, "loss": 0.1684, "step": 8343 }, { "epoch": 0.6660812644687475, "grad_norm": 0.2767960759959787, "learning_rate": 5.299933276575909e-06, "loss": 0.1588, "step": 8344 }, { "epoch": 0.666161092041191, "grad_norm": 0.2976524870185914, "learning_rate": 5.297651344017778e-06, "loss": 0.156, "step": 8345 }, { "epoch": 0.6662409196136345, "grad_norm": 0.3295900491658204, "learning_rate": 5.295369725793051e-06, "loss": 0.1657, "step": 8346 }, { "epoch": 0.666320747186078, "grad_norm": 0.2925355399523587, "learning_rate": 5.29308842205424e-06, "loss": 0.138, "step": 8347 }, { "epoch": 0.6664005747585215, "grad_norm": 0.28621019701282685, "learning_rate": 5.29080743295384e-06, "loss": 0.1604, "step": 8348 }, { "epoch": 0.6664804023309651, "grad_norm": 0.30607308247679527, "learning_rate": 5.288526758644325e-06, "loss": 0.1382, "step": 8349 }, { "epoch": 0.6665602299034087, "grad_norm": 0.27492672909366217, "learning_rate": 5.28624639927815e-06, "loss": 0.1665, "step": 8350 }, { "epoch": 0.6666400574758522, "grad_norm": 0.29855362267733565, "learning_rate": 5.283966355007753e-06, "loss": 0.1644, "step": 8351 }, { "epoch": 0.6667198850482957, "grad_norm": 0.28474631563800784, "learning_rate": 5.28168662598554e-06, "loss": 0.132, "step": 8352 }, { "epoch": 0.6667997126207392, "grad_norm": 0.2885066501468154, "learning_rate": 5.279407212363903e-06, "loss": 0.1829, "step": 8353 }, { "epoch": 0.6668795401931827, "grad_norm": 0.27955419901432893, "learning_rate": 5.2771281142952104e-06, "loss": 0.1523, "step": 8354 }, { "epoch": 0.6669593677656263, "grad_norm": 0.28580827321168273, "learning_rate": 5.274849331931816e-06, "loss": 0.1586, "step": 8355 }, { "epoch": 0.6670391953380698, "grad_norm": 0.31742570284583377, "learning_rate": 5.2725708654260455e-06, "loss": 0.1547, "step": 8356 }, { "epoch": 0.6671190229105133, "grad_norm": 0.31371890401789576, "learning_rate": 5.270292714930203e-06, "loss": 0.199, "step": 8357 }, { "epoch": 0.6671988504829568, "grad_norm": 0.295712065465252, "learning_rate": 5.268014880596575e-06, "loss": 0.1443, "step": 8358 }, { "epoch": 0.6672786780554003, "grad_norm": 0.30294093864533744, "learning_rate": 5.265737362577421e-06, "loss": 0.1312, "step": 8359 }, { "epoch": 0.6673585056278438, "grad_norm": 0.2896621954274151, "learning_rate": 5.2634601610249946e-06, "loss": 0.1868, "step": 8360 }, { "epoch": 0.6674383332002873, "grad_norm": 0.3320775538686266, "learning_rate": 5.261183276091511e-06, "loss": 0.1761, "step": 8361 }, { "epoch": 0.6675181607727309, "grad_norm": 0.27128970361053356, "learning_rate": 5.258906707929172e-06, "loss": 0.1776, "step": 8362 }, { "epoch": 0.6675979883451745, "grad_norm": 0.2720113322735277, "learning_rate": 5.2566304566901575e-06, "loss": 0.1763, "step": 8363 }, { "epoch": 0.667677815917618, "grad_norm": 0.30089967399084655, "learning_rate": 5.254354522526622e-06, "loss": 0.1681, "step": 8364 }, { "epoch": 0.6677576434900615, "grad_norm": 0.2781387761146372, "learning_rate": 5.25207890559071e-06, "loss": 0.1982, "step": 8365 }, { "epoch": 0.667837471062505, "grad_norm": 0.2378210412915659, "learning_rate": 5.249803606034532e-06, "loss": 0.13, "step": 8366 }, { "epoch": 0.6679172986349485, "grad_norm": 0.2930426315832251, "learning_rate": 5.247528624010185e-06, "loss": 0.1831, "step": 8367 }, { "epoch": 0.667997126207392, "grad_norm": 0.2778333715107163, "learning_rate": 5.245253959669738e-06, "loss": 0.1863, "step": 8368 }, { "epoch": 0.6680769537798356, "grad_norm": 0.2632997820248786, "learning_rate": 5.242979613165252e-06, "loss": 0.1482, "step": 8369 }, { "epoch": 0.6681567813522791, "grad_norm": 0.2714831954582447, "learning_rate": 5.240705584648757e-06, "loss": 0.1626, "step": 8370 }, { "epoch": 0.6682366089247226, "grad_norm": 0.33668442862498466, "learning_rate": 5.2384318742722515e-06, "loss": 0.1545, "step": 8371 }, { "epoch": 0.6683164364971661, "grad_norm": 0.2928874603937353, "learning_rate": 5.236158482187735e-06, "loss": 0.1404, "step": 8372 }, { "epoch": 0.6683962640696096, "grad_norm": 0.2949311434687437, "learning_rate": 5.233885408547168e-06, "loss": 0.1603, "step": 8373 }, { "epoch": 0.6684760916420531, "grad_norm": 0.27089364603197774, "learning_rate": 5.231612653502507e-06, "loss": 0.1899, "step": 8374 }, { "epoch": 0.6685559192144966, "grad_norm": 0.26476839877187225, "learning_rate": 5.229340217205663e-06, "loss": 0.1729, "step": 8375 }, { "epoch": 0.6686357467869402, "grad_norm": 0.28492575821025934, "learning_rate": 5.227068099808548e-06, "loss": 0.1642, "step": 8376 }, { "epoch": 0.6687155743593838, "grad_norm": 0.27894472036240303, "learning_rate": 5.22479630146304e-06, "loss": 0.1543, "step": 8377 }, { "epoch": 0.6687954019318273, "grad_norm": 0.27363120817169245, "learning_rate": 5.222524822321009e-06, "loss": 0.1965, "step": 8378 }, { "epoch": 0.6688752295042708, "grad_norm": 0.2637698022548542, "learning_rate": 5.220253662534283e-06, "loss": 0.1325, "step": 8379 }, { "epoch": 0.6689550570767143, "grad_norm": 0.2897062148043181, "learning_rate": 5.2179828222546795e-06, "loss": 0.1678, "step": 8380 }, { "epoch": 0.6690348846491578, "grad_norm": 0.32160713614706504, "learning_rate": 5.215712301634005e-06, "loss": 0.1786, "step": 8381 }, { "epoch": 0.6691147122216013, "grad_norm": 0.2704436303919399, "learning_rate": 5.213442100824025e-06, "loss": 0.1326, "step": 8382 }, { "epoch": 0.6691945397940449, "grad_norm": 0.3072693493553268, "learning_rate": 5.211172219976506e-06, "loss": 0.1806, "step": 8383 }, { "epoch": 0.6692743673664884, "grad_norm": 0.2779986617697151, "learning_rate": 5.208902659243166e-06, "loss": 0.1579, "step": 8384 }, { "epoch": 0.6693541949389319, "grad_norm": 0.3115200950873383, "learning_rate": 5.2066334187757265e-06, "loss": 0.1069, "step": 8385 }, { "epoch": 0.6694340225113754, "grad_norm": 0.2527437218742381, "learning_rate": 5.204364498725871e-06, "loss": 0.1358, "step": 8386 }, { "epoch": 0.6695138500838189, "grad_norm": 0.3256954868251231, "learning_rate": 5.202095899245274e-06, "loss": 0.1505, "step": 8387 }, { "epoch": 0.6695936776562624, "grad_norm": 0.2819573804043021, "learning_rate": 5.199827620485576e-06, "loss": 0.1888, "step": 8388 }, { "epoch": 0.669673505228706, "grad_norm": 0.30171606851375177, "learning_rate": 5.197559662598404e-06, "loss": 0.1951, "step": 8389 }, { "epoch": 0.6697533328011496, "grad_norm": 0.31315968594504634, "learning_rate": 5.195292025735365e-06, "loss": 0.1744, "step": 8390 }, { "epoch": 0.6698331603735931, "grad_norm": 0.30039658130037017, "learning_rate": 5.1930247100480405e-06, "loss": 0.1865, "step": 8391 }, { "epoch": 0.6699129879460366, "grad_norm": 0.29483518281662907, "learning_rate": 5.190757715687991e-06, "loss": 0.2148, "step": 8392 }, { "epoch": 0.6699928155184801, "grad_norm": 0.30945787268736846, "learning_rate": 5.1884910428067555e-06, "loss": 0.152, "step": 8393 }, { "epoch": 0.6700726430909236, "grad_norm": 0.29511712604245005, "learning_rate": 5.1862246915558504e-06, "loss": 0.1561, "step": 8394 }, { "epoch": 0.6701524706633671, "grad_norm": 0.27091573795530344, "learning_rate": 5.183958662086777e-06, "loss": 0.1932, "step": 8395 }, { "epoch": 0.6702322982358107, "grad_norm": 0.3414130541660689, "learning_rate": 5.181692954551007e-06, "loss": 0.1948, "step": 8396 }, { "epoch": 0.6703121258082542, "grad_norm": 0.3608308008668163, "learning_rate": 5.179427569099996e-06, "loss": 0.1618, "step": 8397 }, { "epoch": 0.6703919533806977, "grad_norm": 0.2700347716385401, "learning_rate": 5.1771625058851715e-06, "loss": 0.2033, "step": 8398 }, { "epoch": 0.6704717809531412, "grad_norm": 0.2786511181957121, "learning_rate": 5.174897765057951e-06, "loss": 0.1404, "step": 8399 }, { "epoch": 0.6705516085255847, "grad_norm": 0.2975995434952899, "learning_rate": 5.17263334676972e-06, "loss": 0.1398, "step": 8400 }, { "epoch": 0.6706314360980282, "grad_norm": 0.290289109386978, "learning_rate": 5.170369251171845e-06, "loss": 0.1485, "step": 8401 }, { "epoch": 0.6707112636704717, "grad_norm": 0.2847784570673677, "learning_rate": 5.168105478415674e-06, "loss": 0.1532, "step": 8402 }, { "epoch": 0.6707910912429152, "grad_norm": 0.2934878494972379, "learning_rate": 5.1658420286525255e-06, "loss": 0.1648, "step": 8403 }, { "epoch": 0.6708709188153589, "grad_norm": 0.29150703194635214, "learning_rate": 5.16357890203371e-06, "loss": 0.1367, "step": 8404 }, { "epoch": 0.6709507463878024, "grad_norm": 0.29690210977236275, "learning_rate": 5.161316098710506e-06, "loss": 0.1572, "step": 8405 }, { "epoch": 0.6710305739602459, "grad_norm": 0.2578480744805547, "learning_rate": 5.159053618834171e-06, "loss": 0.1826, "step": 8406 }, { "epoch": 0.6711104015326894, "grad_norm": 0.2553785128673068, "learning_rate": 5.156791462555941e-06, "loss": 0.1711, "step": 8407 }, { "epoch": 0.6711902291051329, "grad_norm": 0.28984753426904214, "learning_rate": 5.154529630027039e-06, "loss": 0.1448, "step": 8408 }, { "epoch": 0.6712700566775764, "grad_norm": 0.2714290879130496, "learning_rate": 5.152268121398656e-06, "loss": 0.1577, "step": 8409 }, { "epoch": 0.67134988425002, "grad_norm": 0.24156356741941667, "learning_rate": 5.150006936821963e-06, "loss": 0.1512, "step": 8410 }, { "epoch": 0.6714297118224635, "grad_norm": 0.26565751448809616, "learning_rate": 5.147746076448113e-06, "loss": 0.1594, "step": 8411 }, { "epoch": 0.671509539394907, "grad_norm": 0.30601872034082095, "learning_rate": 5.1454855404282325e-06, "loss": 0.1693, "step": 8412 }, { "epoch": 0.6715893669673505, "grad_norm": 0.29440758723660754, "learning_rate": 5.143225328913436e-06, "loss": 0.1412, "step": 8413 }, { "epoch": 0.671669194539794, "grad_norm": 0.2859620166833253, "learning_rate": 5.140965442054806e-06, "loss": 0.1494, "step": 8414 }, { "epoch": 0.6717490221122375, "grad_norm": 0.2874744662725295, "learning_rate": 5.138705880003407e-06, "loss": 0.1793, "step": 8415 }, { "epoch": 0.671828849684681, "grad_norm": 0.3221818877565382, "learning_rate": 5.136446642910281e-06, "loss": 0.2292, "step": 8416 }, { "epoch": 0.6719086772571247, "grad_norm": 0.2824518375792101, "learning_rate": 5.134187730926447e-06, "loss": 0.1519, "step": 8417 }, { "epoch": 0.6719885048295682, "grad_norm": 0.2976838267704227, "learning_rate": 5.131929144202914e-06, "loss": 0.1528, "step": 8418 }, { "epoch": 0.6720683324020117, "grad_norm": 0.2804816039619176, "learning_rate": 5.129670882890645e-06, "loss": 0.1358, "step": 8419 }, { "epoch": 0.6721481599744552, "grad_norm": 0.25205272772949205, "learning_rate": 5.127412947140607e-06, "loss": 0.1771, "step": 8420 }, { "epoch": 0.6722279875468987, "grad_norm": 0.32020825877780806, "learning_rate": 5.125155337103727e-06, "loss": 0.1853, "step": 8421 }, { "epoch": 0.6723078151193422, "grad_norm": 0.2739569061326424, "learning_rate": 5.122898052930929e-06, "loss": 0.1539, "step": 8422 }, { "epoch": 0.6723876426917857, "grad_norm": 0.25945125351300047, "learning_rate": 5.120641094773092e-06, "loss": 0.1584, "step": 8423 }, { "epoch": 0.6724674702642293, "grad_norm": 0.3171448383996485, "learning_rate": 5.118384462781084e-06, "loss": 0.1836, "step": 8424 }, { "epoch": 0.6725472978366728, "grad_norm": 0.27031519437935814, "learning_rate": 5.11612815710576e-06, "loss": 0.1464, "step": 8425 }, { "epoch": 0.6726271254091163, "grad_norm": 0.3077468748177322, "learning_rate": 5.113872177897941e-06, "loss": 0.1769, "step": 8426 }, { "epoch": 0.6727069529815598, "grad_norm": 0.2730640813057041, "learning_rate": 5.111616525308429e-06, "loss": 0.1366, "step": 8427 }, { "epoch": 0.6727867805540033, "grad_norm": 0.28223102962662655, "learning_rate": 5.1093611994880054e-06, "loss": 0.1578, "step": 8428 }, { "epoch": 0.6728666081264468, "grad_norm": 0.29001684330246025, "learning_rate": 5.107106200587433e-06, "loss": 0.1789, "step": 8429 }, { "epoch": 0.6729464356988903, "grad_norm": 0.35006739663987074, "learning_rate": 5.1048515287574494e-06, "loss": 0.1599, "step": 8430 }, { "epoch": 0.673026263271334, "grad_norm": 0.29372876486221955, "learning_rate": 5.102597184148768e-06, "loss": 0.1774, "step": 8431 }, { "epoch": 0.6731060908437775, "grad_norm": 0.30489702492180976, "learning_rate": 5.100343166912085e-06, "loss": 0.1557, "step": 8432 }, { "epoch": 0.673185918416221, "grad_norm": 0.26808010544768124, "learning_rate": 5.098089477198066e-06, "loss": 0.1774, "step": 8433 }, { "epoch": 0.6732657459886645, "grad_norm": 0.2687764288863991, "learning_rate": 5.095836115157371e-06, "loss": 0.1618, "step": 8434 }, { "epoch": 0.673345573561108, "grad_norm": 0.2900012934465343, "learning_rate": 5.093583080940624e-06, "loss": 0.1574, "step": 8435 }, { "epoch": 0.6734254011335515, "grad_norm": 0.3019903110156827, "learning_rate": 5.091330374698431e-06, "loss": 0.1723, "step": 8436 }, { "epoch": 0.673505228705995, "grad_norm": 0.334740374524539, "learning_rate": 5.0890779965813775e-06, "loss": 0.1893, "step": 8437 }, { "epoch": 0.6735850562784386, "grad_norm": 0.27628958719273805, "learning_rate": 5.08682594674002e-06, "loss": 0.1454, "step": 8438 }, { "epoch": 0.6736648838508821, "grad_norm": 0.3130619372368617, "learning_rate": 5.0845742253249095e-06, "loss": 0.2082, "step": 8439 }, { "epoch": 0.6737447114233256, "grad_norm": 0.31530686068276875, "learning_rate": 5.082322832486559e-06, "loss": 0.1413, "step": 8440 }, { "epoch": 0.6738245389957691, "grad_norm": 0.2757384711957004, "learning_rate": 5.080071768375465e-06, "loss": 0.185, "step": 8441 }, { "epoch": 0.6739043665682126, "grad_norm": 0.3175441254712514, "learning_rate": 5.077821033142101e-06, "loss": 0.1606, "step": 8442 }, { "epoch": 0.6739841941406561, "grad_norm": 0.26283062747047276, "learning_rate": 5.075570626936925e-06, "loss": 0.1754, "step": 8443 }, { "epoch": 0.6740640217130998, "grad_norm": 0.30337508987490475, "learning_rate": 5.073320549910366e-06, "loss": 0.1921, "step": 8444 }, { "epoch": 0.6741438492855433, "grad_norm": 0.3481076063293013, "learning_rate": 5.071070802212828e-06, "loss": 0.1832, "step": 8445 }, { "epoch": 0.6742236768579868, "grad_norm": 0.27089194888984425, "learning_rate": 5.068821383994703e-06, "loss": 0.1314, "step": 8446 }, { "epoch": 0.6743035044304303, "grad_norm": 0.2868368133165905, "learning_rate": 5.06657229540635e-06, "loss": 0.1459, "step": 8447 }, { "epoch": 0.6743833320028738, "grad_norm": 0.28761260301640057, "learning_rate": 5.064323536598119e-06, "loss": 0.1495, "step": 8448 }, { "epoch": 0.6744631595753173, "grad_norm": 0.29362261410183976, "learning_rate": 5.062075107720327e-06, "loss": 0.1691, "step": 8449 }, { "epoch": 0.6745429871477608, "grad_norm": 0.3004513644614028, "learning_rate": 5.059827008923273e-06, "loss": 0.1589, "step": 8450 }, { "epoch": 0.6746228147202044, "grad_norm": 0.26634109943129985, "learning_rate": 5.057579240357229e-06, "loss": 0.1873, "step": 8451 }, { "epoch": 0.6747026422926479, "grad_norm": 0.29225275975874054, "learning_rate": 5.055331802172456e-06, "loss": 0.1729, "step": 8452 }, { "epoch": 0.6747824698650914, "grad_norm": 0.2972439130635696, "learning_rate": 5.053084694519189e-06, "loss": 0.1674, "step": 8453 }, { "epoch": 0.6748622974375349, "grad_norm": 0.3068830436272856, "learning_rate": 5.050837917547625e-06, "loss": 0.2023, "step": 8454 }, { "epoch": 0.6749421250099784, "grad_norm": 0.27840713371526027, "learning_rate": 5.0485914714079644e-06, "loss": 0.1387, "step": 8455 }, { "epoch": 0.6750219525824219, "grad_norm": 0.25747957181017134, "learning_rate": 5.046345356250366e-06, "loss": 0.1379, "step": 8456 }, { "epoch": 0.6751017801548654, "grad_norm": 0.30104869040690757, "learning_rate": 5.04409957222498e-06, "loss": 0.1642, "step": 8457 }, { "epoch": 0.6751816077273091, "grad_norm": 0.26601053155005067, "learning_rate": 5.0418541194819245e-06, "loss": 0.1397, "step": 8458 }, { "epoch": 0.6752614352997526, "grad_norm": 0.3335722109611142, "learning_rate": 5.0396089981713e-06, "loss": 0.1203, "step": 8459 }, { "epoch": 0.6753412628721961, "grad_norm": 0.3088911923132892, "learning_rate": 5.037364208443184e-06, "loss": 0.1369, "step": 8460 }, { "epoch": 0.6754210904446396, "grad_norm": 0.33216885378442657, "learning_rate": 5.035119750447628e-06, "loss": 0.204, "step": 8461 }, { "epoch": 0.6755009180170831, "grad_norm": 0.3457839939671633, "learning_rate": 5.032875624334674e-06, "loss": 0.1832, "step": 8462 }, { "epoch": 0.6755807455895266, "grad_norm": 0.3142313739242685, "learning_rate": 5.030631830254322e-06, "loss": 0.1526, "step": 8463 }, { "epoch": 0.6756605731619701, "grad_norm": 0.30472722343547454, "learning_rate": 5.02838836835657e-06, "loss": 0.2027, "step": 8464 }, { "epoch": 0.6757404007344137, "grad_norm": 0.2910794987640128, "learning_rate": 5.026145238791377e-06, "loss": 0.1664, "step": 8465 }, { "epoch": 0.6758202283068572, "grad_norm": 0.23584173498785832, "learning_rate": 5.023902441708698e-06, "loss": 0.1629, "step": 8466 }, { "epoch": 0.6759000558793007, "grad_norm": 0.2501959839926412, "learning_rate": 5.021659977258444e-06, "loss": 0.1352, "step": 8467 }, { "epoch": 0.6759798834517442, "grad_norm": 0.31287740473631365, "learning_rate": 5.019417845590517e-06, "loss": 0.187, "step": 8468 }, { "epoch": 0.6760597110241877, "grad_norm": 0.29719692961510596, "learning_rate": 5.017176046854799e-06, "loss": 0.2443, "step": 8469 }, { "epoch": 0.6761395385966312, "grad_norm": 0.2875597522509462, "learning_rate": 5.014934581201144e-06, "loss": 0.1474, "step": 8470 }, { "epoch": 0.6762193661690749, "grad_norm": 0.29691318412995027, "learning_rate": 5.012693448779384e-06, "loss": 0.1539, "step": 8471 }, { "epoch": 0.6762991937415184, "grad_norm": 0.31144002344109034, "learning_rate": 5.010452649739326e-06, "loss": 0.1952, "step": 8472 }, { "epoch": 0.6763790213139619, "grad_norm": 0.31694715237452303, "learning_rate": 5.008212184230767e-06, "loss": 0.137, "step": 8473 }, { "epoch": 0.6764588488864054, "grad_norm": 0.2778696053537931, "learning_rate": 5.005972052403465e-06, "loss": 0.1652, "step": 8474 }, { "epoch": 0.6765386764588489, "grad_norm": 0.30980563129532646, "learning_rate": 5.003732254407174e-06, "loss": 0.1597, "step": 8475 }, { "epoch": 0.6766185040312924, "grad_norm": 0.3459707365732748, "learning_rate": 5.001492790391607e-06, "loss": 0.1692, "step": 8476 }, { "epoch": 0.6766983316037359, "grad_norm": 0.27375784545189574, "learning_rate": 4.999253660506462e-06, "loss": 0.1905, "step": 8477 }, { "epoch": 0.6767781591761795, "grad_norm": 0.3524394169254864, "learning_rate": 4.997014864901423e-06, "loss": 0.17, "step": 8478 }, { "epoch": 0.676857986748623, "grad_norm": 0.28146800966382624, "learning_rate": 4.994776403726141e-06, "loss": 0.157, "step": 8479 }, { "epoch": 0.6769378143210665, "grad_norm": 0.2762234011431507, "learning_rate": 4.99253827713025e-06, "loss": 0.1616, "step": 8480 }, { "epoch": 0.67701764189351, "grad_norm": 0.2923860372443242, "learning_rate": 4.990300485263354e-06, "loss": 0.1844, "step": 8481 }, { "epoch": 0.6770974694659535, "grad_norm": 0.2957162866306635, "learning_rate": 4.988063028275048e-06, "loss": 0.1274, "step": 8482 }, { "epoch": 0.677177297038397, "grad_norm": 0.2705692983925578, "learning_rate": 4.985825906314896e-06, "loss": 0.1465, "step": 8483 }, { "epoch": 0.6772571246108405, "grad_norm": 0.3121422348370637, "learning_rate": 4.983589119532438e-06, "loss": 0.1691, "step": 8484 }, { "epoch": 0.6773369521832842, "grad_norm": 0.3223932897222155, "learning_rate": 4.981352668077195e-06, "loss": 0.1558, "step": 8485 }, { "epoch": 0.6774167797557277, "grad_norm": 0.27085657892946613, "learning_rate": 4.979116552098662e-06, "loss": 0.1516, "step": 8486 }, { "epoch": 0.6774966073281712, "grad_norm": 0.2800682188532181, "learning_rate": 4.976880771746322e-06, "loss": 0.2217, "step": 8487 }, { "epoch": 0.6775764349006147, "grad_norm": 0.37523465747076956, "learning_rate": 4.974645327169623e-06, "loss": 0.2081, "step": 8488 }, { "epoch": 0.6776562624730582, "grad_norm": 0.2686629692221819, "learning_rate": 4.972410218517996e-06, "loss": 0.1814, "step": 8489 }, { "epoch": 0.6777360900455017, "grad_norm": 0.28221925049203256, "learning_rate": 4.97017544594085e-06, "loss": 0.1358, "step": 8490 }, { "epoch": 0.6778159176179452, "grad_norm": 0.29657338587159715, "learning_rate": 4.967941009587567e-06, "loss": 0.1422, "step": 8491 }, { "epoch": 0.6778957451903888, "grad_norm": 0.31556529987212023, "learning_rate": 4.9657069096075176e-06, "loss": 0.1859, "step": 8492 }, { "epoch": 0.6779755727628323, "grad_norm": 0.2678855928829278, "learning_rate": 4.963473146150037e-06, "loss": 0.1338, "step": 8493 }, { "epoch": 0.6780554003352758, "grad_norm": 0.2861710559217196, "learning_rate": 4.961239719364446e-06, "loss": 0.1897, "step": 8494 }, { "epoch": 0.6781352279077193, "grad_norm": 0.2799046363891232, "learning_rate": 4.959006629400035e-06, "loss": 0.1492, "step": 8495 }, { "epoch": 0.6782150554801628, "grad_norm": 0.23492954335989968, "learning_rate": 4.956773876406086e-06, "loss": 0.1971, "step": 8496 }, { "epoch": 0.6782948830526063, "grad_norm": 0.2755783319363071, "learning_rate": 4.9545414605318475e-06, "loss": 0.1541, "step": 8497 }, { "epoch": 0.6783747106250498, "grad_norm": 0.33624344692941854, "learning_rate": 4.952309381926539e-06, "loss": 0.1272, "step": 8498 }, { "epoch": 0.6784545381974935, "grad_norm": 0.25265618141367213, "learning_rate": 4.9500776407393755e-06, "loss": 0.1553, "step": 8499 }, { "epoch": 0.678534365769937, "grad_norm": 0.2578569932052565, "learning_rate": 4.947846237119534e-06, "loss": 0.1442, "step": 8500 }, { "epoch": 0.6786141933423805, "grad_norm": 0.2517500442551135, "learning_rate": 4.945615171216186e-06, "loss": 0.1299, "step": 8501 }, { "epoch": 0.678694020914824, "grad_norm": 0.30019817836125456, "learning_rate": 4.9433844431784524e-06, "loss": 0.1486, "step": 8502 }, { "epoch": 0.6787738484872675, "grad_norm": 0.30064597042534624, "learning_rate": 4.941154053155462e-06, "loss": 0.1831, "step": 8503 }, { "epoch": 0.678853676059711, "grad_norm": 0.2894097434721529, "learning_rate": 4.938924001296304e-06, "loss": 0.1579, "step": 8504 }, { "epoch": 0.6789335036321545, "grad_norm": 0.31761082119304057, "learning_rate": 4.936694287750043e-06, "loss": 0.165, "step": 8505 }, { "epoch": 0.6790133312045981, "grad_norm": 0.32564683686000195, "learning_rate": 4.93446491266574e-06, "loss": 0.139, "step": 8506 }, { "epoch": 0.6790931587770416, "grad_norm": 0.41068468323048823, "learning_rate": 4.932235876192403e-06, "loss": 0.1633, "step": 8507 }, { "epoch": 0.6791729863494851, "grad_norm": 0.2821450518238556, "learning_rate": 4.930007178479045e-06, "loss": 0.1545, "step": 8508 }, { "epoch": 0.6792528139219286, "grad_norm": 0.2787588055951984, "learning_rate": 4.92777881967464e-06, "loss": 0.182, "step": 8509 }, { "epoch": 0.6793326414943721, "grad_norm": 0.3164319865113986, "learning_rate": 4.925550799928156e-06, "loss": 0.1642, "step": 8510 }, { "epoch": 0.6794124690668156, "grad_norm": 0.3058827290439189, "learning_rate": 4.923323119388511e-06, "loss": 0.164, "step": 8511 }, { "epoch": 0.6794922966392593, "grad_norm": 0.3346379309044635, "learning_rate": 4.9210957782046295e-06, "loss": 0.1508, "step": 8512 }, { "epoch": 0.6795721242117028, "grad_norm": 0.31015869637670296, "learning_rate": 4.918868776525395e-06, "loss": 0.136, "step": 8513 }, { "epoch": 0.6796519517841463, "grad_norm": 0.2494848697183122, "learning_rate": 4.9166421144996755e-06, "loss": 0.1496, "step": 8514 }, { "epoch": 0.6797317793565898, "grad_norm": 0.2721341328548602, "learning_rate": 4.914415792276313e-06, "loss": 0.1843, "step": 8515 }, { "epoch": 0.6798116069290333, "grad_norm": 0.3181430498447428, "learning_rate": 4.912189810004125e-06, "loss": 0.1596, "step": 8516 }, { "epoch": 0.6798914345014768, "grad_norm": 0.31707321364872004, "learning_rate": 4.909964167831917e-06, "loss": 0.1448, "step": 8517 }, { "epoch": 0.6799712620739203, "grad_norm": 0.26749894493924176, "learning_rate": 4.907738865908463e-06, "loss": 0.1681, "step": 8518 }, { "epoch": 0.6800510896463638, "grad_norm": 0.31892721510751526, "learning_rate": 4.905513904382512e-06, "loss": 0.146, "step": 8519 }, { "epoch": 0.6801309172188074, "grad_norm": 0.28317338263107356, "learning_rate": 4.903289283402796e-06, "loss": 0.1818, "step": 8520 }, { "epoch": 0.6802107447912509, "grad_norm": 0.30920316258542185, "learning_rate": 4.901065003118018e-06, "loss": 0.1453, "step": 8521 }, { "epoch": 0.6802905723636944, "grad_norm": 0.2863596838863958, "learning_rate": 4.898841063676868e-06, "loss": 0.2108, "step": 8522 }, { "epoch": 0.6803703999361379, "grad_norm": 0.32501525780958224, "learning_rate": 4.896617465228008e-06, "loss": 0.1593, "step": 8523 }, { "epoch": 0.6804502275085814, "grad_norm": 0.2761026494200194, "learning_rate": 4.8943942079200725e-06, "loss": 0.1365, "step": 8524 }, { "epoch": 0.6805300550810249, "grad_norm": 0.31160171101817935, "learning_rate": 4.892171291901675e-06, "loss": 0.1402, "step": 8525 }, { "epoch": 0.6806098826534686, "grad_norm": 0.25523229912419054, "learning_rate": 4.889948717321418e-06, "loss": 0.1515, "step": 8526 }, { "epoch": 0.6806897102259121, "grad_norm": 0.3787519392150843, "learning_rate": 4.887726484327866e-06, "loss": 0.1547, "step": 8527 }, { "epoch": 0.6807695377983556, "grad_norm": 0.3180425392484874, "learning_rate": 4.885504593069567e-06, "loss": 0.1748, "step": 8528 }, { "epoch": 0.6808493653707991, "grad_norm": 0.3176550718917791, "learning_rate": 4.8832830436950455e-06, "loss": 0.1823, "step": 8529 }, { "epoch": 0.6809291929432426, "grad_norm": 0.25640128159111975, "learning_rate": 4.881061836352799e-06, "loss": 0.1553, "step": 8530 }, { "epoch": 0.6810090205156861, "grad_norm": 0.29926757207956306, "learning_rate": 4.878840971191316e-06, "loss": 0.1822, "step": 8531 }, { "epoch": 0.6810888480881296, "grad_norm": 0.3139473868115578, "learning_rate": 4.8766204483590485e-06, "loss": 0.159, "step": 8532 }, { "epoch": 0.6811686756605732, "grad_norm": 0.29081403021041574, "learning_rate": 4.874400268004427e-06, "loss": 0.1769, "step": 8533 }, { "epoch": 0.6812485032330167, "grad_norm": 0.2967318275323894, "learning_rate": 4.8721804302758635e-06, "loss": 0.1404, "step": 8534 }, { "epoch": 0.6813283308054602, "grad_norm": 0.3222893795152424, "learning_rate": 4.8699609353217415e-06, "loss": 0.1401, "step": 8535 }, { "epoch": 0.6814081583779037, "grad_norm": 0.28709908834550946, "learning_rate": 4.867741783290434e-06, "loss": 0.1451, "step": 8536 }, { "epoch": 0.6814879859503472, "grad_norm": 0.2925155850431436, "learning_rate": 4.865522974330277e-06, "loss": 0.2016, "step": 8537 }, { "epoch": 0.6815678135227907, "grad_norm": 0.2729767339556213, "learning_rate": 4.86330450858959e-06, "loss": 0.2115, "step": 8538 }, { "epoch": 0.6816476410952343, "grad_norm": 0.276593242918354, "learning_rate": 4.8610863862166646e-06, "loss": 0.1556, "step": 8539 }, { "epoch": 0.6817274686676779, "grad_norm": 0.25470483663147603, "learning_rate": 4.858868607359781e-06, "loss": 0.1483, "step": 8540 }, { "epoch": 0.6818072962401214, "grad_norm": 0.278566068159168, "learning_rate": 4.8566511721671864e-06, "loss": 0.1522, "step": 8541 }, { "epoch": 0.6818871238125649, "grad_norm": 0.3617045289145491, "learning_rate": 4.854434080787106e-06, "loss": 0.1496, "step": 8542 }, { "epoch": 0.6819669513850084, "grad_norm": 0.3141044858273889, "learning_rate": 4.8522173333677436e-06, "loss": 0.1227, "step": 8543 }, { "epoch": 0.6820467789574519, "grad_norm": 0.3473815798685117, "learning_rate": 4.850000930057277e-06, "loss": 0.1412, "step": 8544 }, { "epoch": 0.6821266065298954, "grad_norm": 0.3450466479334777, "learning_rate": 4.8477848710038754e-06, "loss": 0.1694, "step": 8545 }, { "epoch": 0.6822064341023389, "grad_norm": 0.3186363679538201, "learning_rate": 4.8455691563556574e-06, "loss": 0.1744, "step": 8546 }, { "epoch": 0.6822862616747825, "grad_norm": 0.2687859398397784, "learning_rate": 4.843353786260747e-06, "loss": 0.1905, "step": 8547 }, { "epoch": 0.682366089247226, "grad_norm": 0.3137951964105537, "learning_rate": 4.841138760867226e-06, "loss": 0.1745, "step": 8548 }, { "epoch": 0.6824459168196695, "grad_norm": 0.27346917233891227, "learning_rate": 4.838924080323166e-06, "loss": 0.165, "step": 8549 }, { "epoch": 0.682525744392113, "grad_norm": 0.30738595838303207, "learning_rate": 4.836709744776611e-06, "loss": 0.1564, "step": 8550 }, { "epoch": 0.6826055719645565, "grad_norm": 0.2789100885123396, "learning_rate": 4.834495754375569e-06, "loss": 0.123, "step": 8551 }, { "epoch": 0.682685399537, "grad_norm": 0.2760867785186207, "learning_rate": 4.8322821092680475e-06, "loss": 0.1946, "step": 8552 }, { "epoch": 0.6827652271094437, "grad_norm": 0.31385734095766143, "learning_rate": 4.830068809602011e-06, "loss": 0.1776, "step": 8553 }, { "epoch": 0.6828450546818872, "grad_norm": 0.2909554450822239, "learning_rate": 4.827855855525423e-06, "loss": 0.1272, "step": 8554 }, { "epoch": 0.6829248822543307, "grad_norm": 0.30391949688606007, "learning_rate": 4.8256432471861955e-06, "loss": 0.1775, "step": 8555 }, { "epoch": 0.6830047098267742, "grad_norm": 0.32032995898829636, "learning_rate": 4.823430984732244e-06, "loss": 0.1303, "step": 8556 }, { "epoch": 0.6830845373992177, "grad_norm": 0.25918585282902956, "learning_rate": 4.8212190683114445e-06, "loss": 0.1737, "step": 8557 }, { "epoch": 0.6831643649716612, "grad_norm": 0.30881917189019664, "learning_rate": 4.819007498071656e-06, "loss": 0.1335, "step": 8558 }, { "epoch": 0.6832441925441047, "grad_norm": 0.3059603934818001, "learning_rate": 4.8167962741607135e-06, "loss": 0.1409, "step": 8559 }, { "epoch": 0.6833240201165482, "grad_norm": 0.30861964872163794, "learning_rate": 4.8145853967264235e-06, "loss": 0.1549, "step": 8560 }, { "epoch": 0.6834038476889918, "grad_norm": 0.32796123837719565, "learning_rate": 4.8123748659165825e-06, "loss": 0.1465, "step": 8561 }, { "epoch": 0.6834836752614353, "grad_norm": 0.32610389835979176, "learning_rate": 4.810164681878953e-06, "loss": 0.1519, "step": 8562 }, { "epoch": 0.6835635028338788, "grad_norm": 0.3253366497835228, "learning_rate": 4.807954844761276e-06, "loss": 0.1543, "step": 8563 }, { "epoch": 0.6836433304063223, "grad_norm": 0.2708646504385788, "learning_rate": 4.805745354711268e-06, "loss": 0.1704, "step": 8564 }, { "epoch": 0.6837231579787658, "grad_norm": 0.2962451765531735, "learning_rate": 4.8035362118766274e-06, "loss": 0.1733, "step": 8565 }, { "epoch": 0.6838029855512094, "grad_norm": 0.2918949387598904, "learning_rate": 4.801327416405027e-06, "loss": 0.1289, "step": 8566 }, { "epoch": 0.683882813123653, "grad_norm": 0.28888583982057947, "learning_rate": 4.799118968444118e-06, "loss": 0.1471, "step": 8567 }, { "epoch": 0.6839626406960965, "grad_norm": 0.39246132807284523, "learning_rate": 4.796910868141522e-06, "loss": 0.1531, "step": 8568 }, { "epoch": 0.68404246826854, "grad_norm": 0.30522109320398205, "learning_rate": 4.79470311564484e-06, "loss": 0.1823, "step": 8569 }, { "epoch": 0.6841222958409835, "grad_norm": 0.3200773501755574, "learning_rate": 4.792495711101659e-06, "loss": 0.1329, "step": 8570 }, { "epoch": 0.684202123413427, "grad_norm": 0.32882170257959875, "learning_rate": 4.7902886546595314e-06, "loss": 0.169, "step": 8571 }, { "epoch": 0.6842819509858705, "grad_norm": 0.33652390810447597, "learning_rate": 4.788081946465991e-06, "loss": 0.2221, "step": 8572 }, { "epoch": 0.684361778558314, "grad_norm": 0.3158896061889971, "learning_rate": 4.7858755866685465e-06, "loss": 0.1928, "step": 8573 }, { "epoch": 0.6844416061307576, "grad_norm": 0.31311259981828293, "learning_rate": 4.783669575414681e-06, "loss": 0.1705, "step": 8574 }, { "epoch": 0.6845214337032011, "grad_norm": 0.3323749953347415, "learning_rate": 4.781463912851865e-06, "loss": 0.1282, "step": 8575 }, { "epoch": 0.6846012612756446, "grad_norm": 0.2895992357103372, "learning_rate": 4.779258599127533e-06, "loss": 0.1928, "step": 8576 }, { "epoch": 0.6846810888480881, "grad_norm": 0.35059347821210946, "learning_rate": 4.777053634389104e-06, "loss": 0.1555, "step": 8577 }, { "epoch": 0.6847609164205316, "grad_norm": 0.279369598008141, "learning_rate": 4.774849018783967e-06, "loss": 0.121, "step": 8578 }, { "epoch": 0.6848407439929751, "grad_norm": 0.24770126187393218, "learning_rate": 4.772644752459498e-06, "loss": 0.1498, "step": 8579 }, { "epoch": 0.6849205715654187, "grad_norm": 0.26425383645331085, "learning_rate": 4.770440835563041e-06, "loss": 0.1657, "step": 8580 }, { "epoch": 0.6850003991378623, "grad_norm": 0.313625945432728, "learning_rate": 4.768237268241917e-06, "loss": 0.1777, "step": 8581 }, { "epoch": 0.6850802267103058, "grad_norm": 0.26053733533227336, "learning_rate": 4.7660340506434285e-06, "loss": 0.1199, "step": 8582 }, { "epoch": 0.6851600542827493, "grad_norm": 0.2975534514987722, "learning_rate": 4.763831182914848e-06, "loss": 0.209, "step": 8583 }, { "epoch": 0.6852398818551928, "grad_norm": 0.31963273607344517, "learning_rate": 4.761628665203432e-06, "loss": 0.1754, "step": 8584 }, { "epoch": 0.6853197094276363, "grad_norm": 0.28581607158292327, "learning_rate": 4.759426497656412e-06, "loss": 0.2428, "step": 8585 }, { "epoch": 0.6853995370000798, "grad_norm": 0.2597227565590359, "learning_rate": 4.757224680420991e-06, "loss": 0.1377, "step": 8586 }, { "epoch": 0.6854793645725233, "grad_norm": 0.28563364591833895, "learning_rate": 4.755023213644351e-06, "loss": 0.1761, "step": 8587 }, { "epoch": 0.6855591921449669, "grad_norm": 0.3287718692848102, "learning_rate": 4.7528220974736494e-06, "loss": 0.1827, "step": 8588 }, { "epoch": 0.6856390197174104, "grad_norm": 0.3007202272169316, "learning_rate": 4.7506213320560335e-06, "loss": 0.1584, "step": 8589 }, { "epoch": 0.6857188472898539, "grad_norm": 0.30098221860346475, "learning_rate": 4.7484209175386e-06, "loss": 0.1427, "step": 8590 }, { "epoch": 0.6857986748622974, "grad_norm": 0.28559844248122723, "learning_rate": 4.746220854068449e-06, "loss": 0.1608, "step": 8591 }, { "epoch": 0.6858785024347409, "grad_norm": 0.3302751471895253, "learning_rate": 4.7440211417926375e-06, "loss": 0.1552, "step": 8592 }, { "epoch": 0.6859583300071845, "grad_norm": 0.3033435895760666, "learning_rate": 4.741821780858221e-06, "loss": 0.1935, "step": 8593 }, { "epoch": 0.686038157579628, "grad_norm": 0.2886398535088996, "learning_rate": 4.739622771412205e-06, "loss": 0.1334, "step": 8594 }, { "epoch": 0.6861179851520716, "grad_norm": 0.2913691678404063, "learning_rate": 4.737424113601588e-06, "loss": 0.1747, "step": 8595 }, { "epoch": 0.6861978127245151, "grad_norm": 0.31465670967708476, "learning_rate": 4.735225807573345e-06, "loss": 0.1596, "step": 8596 }, { "epoch": 0.6862776402969586, "grad_norm": 0.31634505102060057, "learning_rate": 4.733027853474417e-06, "loss": 0.1671, "step": 8597 }, { "epoch": 0.6863574678694021, "grad_norm": 0.26697448914345606, "learning_rate": 4.730830251451742e-06, "loss": 0.1673, "step": 8598 }, { "epoch": 0.6864372954418456, "grad_norm": 0.2804306460759436, "learning_rate": 4.728633001652204e-06, "loss": 0.1813, "step": 8599 }, { "epoch": 0.6865171230142891, "grad_norm": 0.2988071813194847, "learning_rate": 4.726436104222693e-06, "loss": 0.1707, "step": 8600 }, { "epoch": 0.6865969505867326, "grad_norm": 0.31685758818371174, "learning_rate": 4.7242395593100594e-06, "loss": 0.112, "step": 8601 }, { "epoch": 0.6866767781591762, "grad_norm": 0.25905924703543753, "learning_rate": 4.722043367061133e-06, "loss": 0.1717, "step": 8602 }, { "epoch": 0.6867566057316197, "grad_norm": 0.31999047424242305, "learning_rate": 4.719847527622719e-06, "loss": 0.1517, "step": 8603 }, { "epoch": 0.6868364333040632, "grad_norm": 0.33039002411885904, "learning_rate": 4.717652041141599e-06, "loss": 0.1673, "step": 8604 }, { "epoch": 0.6869162608765067, "grad_norm": 0.2624021228448817, "learning_rate": 4.715456907764539e-06, "loss": 0.1502, "step": 8605 }, { "epoch": 0.6869960884489502, "grad_norm": 0.25296357655987683, "learning_rate": 4.713262127638273e-06, "loss": 0.1399, "step": 8606 }, { "epoch": 0.6870759160213938, "grad_norm": 0.2549916782797054, "learning_rate": 4.711067700909511e-06, "loss": 0.1441, "step": 8607 }, { "epoch": 0.6871557435938374, "grad_norm": 0.280318986929208, "learning_rate": 4.708873627724938e-06, "loss": 0.1703, "step": 8608 }, { "epoch": 0.6872355711662809, "grad_norm": 0.30189500777742523, "learning_rate": 4.706679908231229e-06, "loss": 0.142, "step": 8609 }, { "epoch": 0.6873153987387244, "grad_norm": 0.338379950655916, "learning_rate": 4.704486542575019e-06, "loss": 0.1588, "step": 8610 }, { "epoch": 0.6873952263111679, "grad_norm": 0.2723508644803642, "learning_rate": 4.702293530902927e-06, "loss": 0.1677, "step": 8611 }, { "epoch": 0.6874750538836114, "grad_norm": 0.2851238830661648, "learning_rate": 4.700100873361549e-06, "loss": 0.1559, "step": 8612 }, { "epoch": 0.6875548814560549, "grad_norm": 0.3269127462045986, "learning_rate": 4.697908570097448e-06, "loss": 0.1992, "step": 8613 }, { "epoch": 0.6876347090284984, "grad_norm": 0.2849079640272488, "learning_rate": 4.69571662125718e-06, "loss": 0.1677, "step": 8614 }, { "epoch": 0.687714536600942, "grad_norm": 0.34427192316603544, "learning_rate": 4.693525026987266e-06, "loss": 0.1475, "step": 8615 }, { "epoch": 0.6877943641733855, "grad_norm": 0.26613144247000753, "learning_rate": 4.691333787434204e-06, "loss": 0.166, "step": 8616 }, { "epoch": 0.687874191745829, "grad_norm": 0.279579640465013, "learning_rate": 4.689142902744469e-06, "loss": 0.151, "step": 8617 }, { "epoch": 0.6879540193182725, "grad_norm": 0.30904841318630366, "learning_rate": 4.686952373064509e-06, "loss": 0.1264, "step": 8618 }, { "epoch": 0.688033846890716, "grad_norm": 0.28090250136146033, "learning_rate": 4.684762198540762e-06, "loss": 0.1412, "step": 8619 }, { "epoch": 0.6881136744631596, "grad_norm": 0.25341209208395393, "learning_rate": 4.682572379319628e-06, "loss": 0.169, "step": 8620 }, { "epoch": 0.6881935020356031, "grad_norm": 0.2684912865819158, "learning_rate": 4.680382915547486e-06, "loss": 0.1783, "step": 8621 }, { "epoch": 0.6882733296080467, "grad_norm": 0.3309567882700401, "learning_rate": 4.67819380737069e-06, "loss": 0.1108, "step": 8622 }, { "epoch": 0.6883531571804902, "grad_norm": 0.2884476064566218, "learning_rate": 4.6760050549355815e-06, "loss": 0.1788, "step": 8623 }, { "epoch": 0.6884329847529337, "grad_norm": 0.3379970175612652, "learning_rate": 4.6738166583884655e-06, "loss": 0.1764, "step": 8624 }, { "epoch": 0.6885128123253772, "grad_norm": 0.2880165557764968, "learning_rate": 4.671628617875629e-06, "loss": 0.1627, "step": 8625 }, { "epoch": 0.6885926398978207, "grad_norm": 0.3001712916951849, "learning_rate": 4.6694409335433325e-06, "loss": 0.1966, "step": 8626 }, { "epoch": 0.6886724674702642, "grad_norm": 0.3412358040341381, "learning_rate": 4.6672536055378095e-06, "loss": 0.143, "step": 8627 }, { "epoch": 0.6887522950427077, "grad_norm": 0.30498072725876524, "learning_rate": 4.665066634005283e-06, "loss": 0.1479, "step": 8628 }, { "epoch": 0.6888321226151513, "grad_norm": 0.2686009754019173, "learning_rate": 4.66288001909194e-06, "loss": 0.1546, "step": 8629 }, { "epoch": 0.6889119501875948, "grad_norm": 0.2751546013679183, "learning_rate": 4.6606937609439466e-06, "loss": 0.1401, "step": 8630 }, { "epoch": 0.6889917777600383, "grad_norm": 0.268609437296589, "learning_rate": 4.6585078597074455e-06, "loss": 0.1369, "step": 8631 }, { "epoch": 0.6890716053324818, "grad_norm": 0.3005707811075956, "learning_rate": 4.656322315528551e-06, "loss": 0.1651, "step": 8632 }, { "epoch": 0.6891514329049253, "grad_norm": 0.25720623714743707, "learning_rate": 4.654137128553371e-06, "loss": 0.1741, "step": 8633 }, { "epoch": 0.6892312604773689, "grad_norm": 0.298019868948948, "learning_rate": 4.651952298927959e-06, "loss": 0.1436, "step": 8634 }, { "epoch": 0.6893110880498124, "grad_norm": 0.267953969128415, "learning_rate": 4.649767826798377e-06, "loss": 0.1684, "step": 8635 }, { "epoch": 0.689390915622256, "grad_norm": 0.28153555183681106, "learning_rate": 4.647583712310637e-06, "loss": 0.1796, "step": 8636 }, { "epoch": 0.6894707431946995, "grad_norm": 0.3275133193363329, "learning_rate": 4.6453999556107535e-06, "loss": 0.0966, "step": 8637 }, { "epoch": 0.689550570767143, "grad_norm": 0.27595584927606465, "learning_rate": 4.643216556844684e-06, "loss": 0.1509, "step": 8638 }, { "epoch": 0.6896303983395865, "grad_norm": 0.32235412345833314, "learning_rate": 4.641033516158392e-06, "loss": 0.188, "step": 8639 }, { "epoch": 0.68971022591203, "grad_norm": 0.2629276393061171, "learning_rate": 4.638850833697802e-06, "loss": 0.1339, "step": 8640 }, { "epoch": 0.6897900534844735, "grad_norm": 0.28649105353762017, "learning_rate": 4.636668509608813e-06, "loss": 0.1517, "step": 8641 }, { "epoch": 0.689869881056917, "grad_norm": 0.29986848380694164, "learning_rate": 4.634486544037319e-06, "loss": 0.157, "step": 8642 }, { "epoch": 0.6899497086293606, "grad_norm": 0.28531620859486545, "learning_rate": 4.632304937129157e-06, "loss": 0.1401, "step": 8643 }, { "epoch": 0.6900295362018041, "grad_norm": 0.2860018155063402, "learning_rate": 4.630123689030171e-06, "loss": 0.1626, "step": 8644 }, { "epoch": 0.6901093637742476, "grad_norm": 0.24688166741047868, "learning_rate": 4.627942799886163e-06, "loss": 0.1508, "step": 8645 }, { "epoch": 0.6901891913466911, "grad_norm": 0.29585211654449317, "learning_rate": 4.625762269842926e-06, "loss": 0.1753, "step": 8646 }, { "epoch": 0.6902690189191346, "grad_norm": 0.3038833202194416, "learning_rate": 4.623582099046209e-06, "loss": 0.1513, "step": 8647 }, { "epoch": 0.6903488464915782, "grad_norm": 0.2717876708244926, "learning_rate": 4.6214022876417484e-06, "loss": 0.1991, "step": 8648 }, { "epoch": 0.6904286740640218, "grad_norm": 0.32292778690493174, "learning_rate": 4.6192228357752635e-06, "loss": 0.1817, "step": 8649 }, { "epoch": 0.6905085016364653, "grad_norm": 0.24277011736877005, "learning_rate": 4.6170437435924385e-06, "loss": 0.1384, "step": 8650 }, { "epoch": 0.6905883292089088, "grad_norm": 0.31752093971579315, "learning_rate": 4.614865011238936e-06, "loss": 0.2038, "step": 8651 }, { "epoch": 0.6906681567813523, "grad_norm": 0.27425424983862906, "learning_rate": 4.612686638860392e-06, "loss": 0.1667, "step": 8652 }, { "epoch": 0.6907479843537958, "grad_norm": 0.3110508917637237, "learning_rate": 4.610508626602431e-06, "loss": 0.1508, "step": 8653 }, { "epoch": 0.6908278119262393, "grad_norm": 0.28016715269845704, "learning_rate": 4.60833097461064e-06, "loss": 0.2251, "step": 8654 }, { "epoch": 0.6909076394986828, "grad_norm": 0.29730016573963075, "learning_rate": 4.606153683030585e-06, "loss": 0.1856, "step": 8655 }, { "epoch": 0.6909874670711263, "grad_norm": 0.323510975078991, "learning_rate": 4.603976752007811e-06, "loss": 0.1401, "step": 8656 }, { "epoch": 0.6910672946435699, "grad_norm": 0.27969846834234224, "learning_rate": 4.601800181687833e-06, "loss": 0.1353, "step": 8657 }, { "epoch": 0.6911471222160134, "grad_norm": 0.3013863848774205, "learning_rate": 4.599623972216155e-06, "loss": 0.1788, "step": 8658 }, { "epoch": 0.6912269497884569, "grad_norm": 0.3142132307313109, "learning_rate": 4.597448123738242e-06, "loss": 0.1383, "step": 8659 }, { "epoch": 0.6913067773609004, "grad_norm": 0.3104510137362358, "learning_rate": 4.595272636399541e-06, "loss": 0.2036, "step": 8660 }, { "epoch": 0.691386604933344, "grad_norm": 0.26693178262037676, "learning_rate": 4.5930975103454766e-06, "loss": 0.1464, "step": 8661 }, { "epoch": 0.6914664325057875, "grad_norm": 0.24961706167158965, "learning_rate": 4.590922745721441e-06, "loss": 0.1552, "step": 8662 }, { "epoch": 0.6915462600782311, "grad_norm": 0.2767944325484257, "learning_rate": 4.5887483426728195e-06, "loss": 0.2211, "step": 8663 }, { "epoch": 0.6916260876506746, "grad_norm": 0.3340150755891809, "learning_rate": 4.586574301344955e-06, "loss": 0.1813, "step": 8664 }, { "epoch": 0.6917059152231181, "grad_norm": 0.3086172542492666, "learning_rate": 4.584400621883176e-06, "loss": 0.1678, "step": 8665 }, { "epoch": 0.6917857427955616, "grad_norm": 0.27185879760519743, "learning_rate": 4.58222730443278e-06, "loss": 0.1942, "step": 8666 }, { "epoch": 0.6918655703680051, "grad_norm": 0.34486657132294535, "learning_rate": 4.580054349139052e-06, "loss": 0.1886, "step": 8667 }, { "epoch": 0.6919453979404486, "grad_norm": 0.28824499012393273, "learning_rate": 4.577881756147242e-06, "loss": 0.1748, "step": 8668 }, { "epoch": 0.6920252255128921, "grad_norm": 0.2706935576434971, "learning_rate": 4.575709525602578e-06, "loss": 0.178, "step": 8669 }, { "epoch": 0.6921050530853357, "grad_norm": 0.28083660164677304, "learning_rate": 4.573537657650266e-06, "loss": 0.1889, "step": 8670 }, { "epoch": 0.6921848806577792, "grad_norm": 0.26529517408272724, "learning_rate": 4.571366152435485e-06, "loss": 0.1774, "step": 8671 }, { "epoch": 0.6922647082302227, "grad_norm": 0.26751944313259757, "learning_rate": 4.569195010103395e-06, "loss": 0.1512, "step": 8672 }, { "epoch": 0.6923445358026662, "grad_norm": 0.28606909397143215, "learning_rate": 4.567024230799128e-06, "loss": 0.1547, "step": 8673 }, { "epoch": 0.6924243633751097, "grad_norm": 0.31453940737291747, "learning_rate": 4.5648538146677886e-06, "loss": 0.2114, "step": 8674 }, { "epoch": 0.6925041909475533, "grad_norm": 0.3122462722673202, "learning_rate": 4.562683761854461e-06, "loss": 0.1409, "step": 8675 }, { "epoch": 0.6925840185199968, "grad_norm": 0.28326353100005147, "learning_rate": 4.560514072504208e-06, "loss": 0.1556, "step": 8676 }, { "epoch": 0.6926638460924404, "grad_norm": 0.3064414403282219, "learning_rate": 4.558344746762069e-06, "loss": 0.1382, "step": 8677 }, { "epoch": 0.6927436736648839, "grad_norm": 0.2913136018491997, "learning_rate": 4.5561757847730395e-06, "loss": 0.1771, "step": 8678 }, { "epoch": 0.6928235012373274, "grad_norm": 0.33802257262052776, "learning_rate": 4.5540071866821186e-06, "loss": 0.1973, "step": 8679 }, { "epoch": 0.6929033288097709, "grad_norm": 0.28713791530770016, "learning_rate": 4.551838952634263e-06, "loss": 0.1318, "step": 8680 }, { "epoch": 0.6929831563822144, "grad_norm": 0.37754083404358957, "learning_rate": 4.549671082774419e-06, "loss": 0.1629, "step": 8681 }, { "epoch": 0.6930629839546579, "grad_norm": 0.26128175581898394, "learning_rate": 4.547503577247488e-06, "loss": 0.1346, "step": 8682 }, { "epoch": 0.6931428115271014, "grad_norm": 0.27364751455380076, "learning_rate": 4.545336436198367e-06, "loss": 0.1659, "step": 8683 }, { "epoch": 0.693222639099545, "grad_norm": 0.30357448592678016, "learning_rate": 4.543169659771919e-06, "loss": 0.1775, "step": 8684 }, { "epoch": 0.6933024666719885, "grad_norm": 0.29548306932464063, "learning_rate": 4.541003248112985e-06, "loss": 0.1568, "step": 8685 }, { "epoch": 0.693382294244432, "grad_norm": 0.3295448819555408, "learning_rate": 4.538837201366379e-06, "loss": 0.1455, "step": 8686 }, { "epoch": 0.6934621218168755, "grad_norm": 0.2841140438672646, "learning_rate": 4.536671519676892e-06, "loss": 0.1909, "step": 8687 }, { "epoch": 0.6935419493893191, "grad_norm": 0.2786241181599538, "learning_rate": 4.534506203189296e-06, "loss": 0.13, "step": 8688 }, { "epoch": 0.6936217769617626, "grad_norm": 0.2725888061863357, "learning_rate": 4.532341252048328e-06, "loss": 0.1587, "step": 8689 }, { "epoch": 0.6937016045342062, "grad_norm": 0.27956652331752097, "learning_rate": 4.530176666398717e-06, "loss": 0.2021, "step": 8690 }, { "epoch": 0.6937814321066497, "grad_norm": 0.2916069466938498, "learning_rate": 4.528012446385145e-06, "loss": 0.1455, "step": 8691 }, { "epoch": 0.6938612596790932, "grad_norm": 0.2632446142854999, "learning_rate": 4.525848592152282e-06, "loss": 0.1716, "step": 8692 }, { "epoch": 0.6939410872515367, "grad_norm": 0.30542121808041006, "learning_rate": 4.523685103844782e-06, "loss": 0.1797, "step": 8693 }, { "epoch": 0.6940209148239802, "grad_norm": 0.3273482769980246, "learning_rate": 4.521521981607259e-06, "loss": 0.1481, "step": 8694 }, { "epoch": 0.6941007423964237, "grad_norm": 0.2793140913501022, "learning_rate": 4.519359225584312e-06, "loss": 0.1755, "step": 8695 }, { "epoch": 0.6941805699688672, "grad_norm": 0.3120225972597886, "learning_rate": 4.517196835920507e-06, "loss": 0.1567, "step": 8696 }, { "epoch": 0.6942603975413107, "grad_norm": 0.29579993288974277, "learning_rate": 4.515034812760401e-06, "loss": 0.2473, "step": 8697 }, { "epoch": 0.6943402251137543, "grad_norm": 0.31056474518254434, "learning_rate": 4.512873156248511e-06, "loss": 0.1646, "step": 8698 }, { "epoch": 0.6944200526861978, "grad_norm": 0.28818527959696266, "learning_rate": 4.510711866529335e-06, "loss": 0.1644, "step": 8699 }, { "epoch": 0.6944998802586413, "grad_norm": 0.28215067316590525, "learning_rate": 4.508550943747347e-06, "loss": 0.2257, "step": 8700 }, { "epoch": 0.6945797078310848, "grad_norm": 0.28947633824236074, "learning_rate": 4.5063903880469926e-06, "loss": 0.1602, "step": 8701 }, { "epoch": 0.6946595354035284, "grad_norm": 0.2765971494900004, "learning_rate": 4.504230199572705e-06, "loss": 0.1437, "step": 8702 }, { "epoch": 0.6947393629759719, "grad_norm": 0.264986103892447, "learning_rate": 4.502070378468879e-06, "loss": 0.1417, "step": 8703 }, { "epoch": 0.6948191905484155, "grad_norm": 0.33700866354110137, "learning_rate": 4.499910924879891e-06, "loss": 0.1176, "step": 8704 }, { "epoch": 0.694899018120859, "grad_norm": 0.2698040822053028, "learning_rate": 4.4977518389500876e-06, "loss": 0.1574, "step": 8705 }, { "epoch": 0.6949788456933025, "grad_norm": 0.295085317333565, "learning_rate": 4.4955931208238025e-06, "loss": 0.1337, "step": 8706 }, { "epoch": 0.695058673265746, "grad_norm": 0.27891152280780024, "learning_rate": 4.493434770645334e-06, "loss": 0.1624, "step": 8707 }, { "epoch": 0.6951385008381895, "grad_norm": 0.2837932494067338, "learning_rate": 4.491276788558959e-06, "loss": 0.1812, "step": 8708 }, { "epoch": 0.695218328410633, "grad_norm": 0.27277109452795445, "learning_rate": 4.489119174708931e-06, "loss": 0.143, "step": 8709 }, { "epoch": 0.6952981559830765, "grad_norm": 0.2665557740695765, "learning_rate": 4.486961929239473e-06, "loss": 0.1567, "step": 8710 }, { "epoch": 0.69537798355552, "grad_norm": 0.3247331363413694, "learning_rate": 4.484805052294797e-06, "loss": 0.1509, "step": 8711 }, { "epoch": 0.6954578111279636, "grad_norm": 0.2682495002995182, "learning_rate": 4.482648544019077e-06, "loss": 0.1801, "step": 8712 }, { "epoch": 0.6955376387004071, "grad_norm": 0.30355610522099075, "learning_rate": 4.4804924045564664e-06, "loss": 0.1694, "step": 8713 }, { "epoch": 0.6956174662728506, "grad_norm": 0.32392082838084124, "learning_rate": 4.478336634051097e-06, "loss": 0.1506, "step": 8714 }, { "epoch": 0.6956972938452942, "grad_norm": 0.2973638492910982, "learning_rate": 4.476181232647068e-06, "loss": 0.2065, "step": 8715 }, { "epoch": 0.6957771214177377, "grad_norm": 0.31022369260749183, "learning_rate": 4.474026200488466e-06, "loss": 0.17, "step": 8716 }, { "epoch": 0.6958569489901812, "grad_norm": 0.29154084464629937, "learning_rate": 4.471871537719345e-06, "loss": 0.1518, "step": 8717 }, { "epoch": 0.6959367765626248, "grad_norm": 0.3068720262758664, "learning_rate": 4.469717244483735e-06, "loss": 0.1655, "step": 8718 }, { "epoch": 0.6960166041350683, "grad_norm": 0.2687983746166673, "learning_rate": 4.467563320925636e-06, "loss": 0.1997, "step": 8719 }, { "epoch": 0.6960964317075118, "grad_norm": 0.2959932910669444, "learning_rate": 4.465409767189041e-06, "loss": 0.1439, "step": 8720 }, { "epoch": 0.6961762592799553, "grad_norm": 0.2693771614352752, "learning_rate": 4.463256583417902e-06, "loss": 0.182, "step": 8721 }, { "epoch": 0.6962560868523988, "grad_norm": 0.29275425607658934, "learning_rate": 4.461103769756143e-06, "loss": 0.1825, "step": 8722 }, { "epoch": 0.6963359144248423, "grad_norm": 0.2751670261334112, "learning_rate": 4.458951326347681e-06, "loss": 0.1562, "step": 8723 }, { "epoch": 0.6964157419972858, "grad_norm": 0.27786823148229395, "learning_rate": 4.45679925333639e-06, "loss": 0.163, "step": 8724 }, { "epoch": 0.6964955695697294, "grad_norm": 0.3063108386302478, "learning_rate": 4.454647550866139e-06, "loss": 0.1324, "step": 8725 }, { "epoch": 0.6965753971421729, "grad_norm": 0.30856418017932385, "learning_rate": 4.452496219080748e-06, "loss": 0.1461, "step": 8726 }, { "epoch": 0.6966552247146164, "grad_norm": 0.29189855375014645, "learning_rate": 4.450345258124034e-06, "loss": 0.2098, "step": 8727 }, { "epoch": 0.6967350522870599, "grad_norm": 0.36094634120144287, "learning_rate": 4.448194668139777e-06, "loss": 0.128, "step": 8728 }, { "epoch": 0.6968148798595035, "grad_norm": 0.31235782627552294, "learning_rate": 4.446044449271736e-06, "loss": 0.1638, "step": 8729 }, { "epoch": 0.696894707431947, "grad_norm": 0.2615797122288546, "learning_rate": 4.4438946016636445e-06, "loss": 0.1895, "step": 8730 }, { "epoch": 0.6969745350043905, "grad_norm": 0.28989029554109924, "learning_rate": 4.441745125459206e-06, "loss": 0.1669, "step": 8731 }, { "epoch": 0.6970543625768341, "grad_norm": 0.3283327827317932, "learning_rate": 4.439596020802115e-06, "loss": 0.1382, "step": 8732 }, { "epoch": 0.6971341901492776, "grad_norm": 0.317753555046989, "learning_rate": 4.43744728783602e-06, "loss": 0.1296, "step": 8733 }, { "epoch": 0.6972140177217211, "grad_norm": 0.28055725274354537, "learning_rate": 4.435298926704569e-06, "loss": 0.1452, "step": 8734 }, { "epoch": 0.6972938452941646, "grad_norm": 0.3586667111791749, "learning_rate": 4.433150937551358e-06, "loss": 0.1816, "step": 8735 }, { "epoch": 0.6973736728666081, "grad_norm": 0.30916641110220705, "learning_rate": 4.431003320519973e-06, "loss": 0.1675, "step": 8736 }, { "epoch": 0.6974535004390516, "grad_norm": 0.2824899444044057, "learning_rate": 4.428856075753981e-06, "loss": 0.1615, "step": 8737 }, { "epoch": 0.6975333280114951, "grad_norm": 0.3223474018427938, "learning_rate": 4.426709203396913e-06, "loss": 0.1615, "step": 8738 }, { "epoch": 0.6976131555839387, "grad_norm": 0.2705236800342658, "learning_rate": 4.424562703592279e-06, "loss": 0.1313, "step": 8739 }, { "epoch": 0.6976929831563822, "grad_norm": 0.26589920273503487, "learning_rate": 4.42241657648356e-06, "loss": 0.1579, "step": 8740 }, { "epoch": 0.6977728107288257, "grad_norm": 0.2785849956349606, "learning_rate": 4.420270822214224e-06, "loss": 0.1707, "step": 8741 }, { "epoch": 0.6978526383012693, "grad_norm": 0.2906156281378789, "learning_rate": 4.418125440927702e-06, "loss": 0.1114, "step": 8742 }, { "epoch": 0.6979324658737128, "grad_norm": 0.31081425301252225, "learning_rate": 4.415980432767405e-06, "loss": 0.1921, "step": 8743 }, { "epoch": 0.6980122934461563, "grad_norm": 0.2621973313585583, "learning_rate": 4.4138357978767165e-06, "loss": 0.2221, "step": 8744 }, { "epoch": 0.6980921210185999, "grad_norm": 0.26095281866966646, "learning_rate": 4.411691536398995e-06, "loss": 0.2037, "step": 8745 }, { "epoch": 0.6981719485910434, "grad_norm": 0.30175867384003585, "learning_rate": 4.409547648477583e-06, "loss": 0.1918, "step": 8746 }, { "epoch": 0.6982517761634869, "grad_norm": 0.26672241933913465, "learning_rate": 4.407404134255787e-06, "loss": 0.1367, "step": 8747 }, { "epoch": 0.6983316037359304, "grad_norm": 0.31917426884146977, "learning_rate": 4.405260993876892e-06, "loss": 0.1494, "step": 8748 }, { "epoch": 0.6984114313083739, "grad_norm": 0.2943856096869159, "learning_rate": 4.403118227484155e-06, "loss": 0.178, "step": 8749 }, { "epoch": 0.6984912588808174, "grad_norm": 0.3817694155541676, "learning_rate": 4.40097583522082e-06, "loss": 0.1741, "step": 8750 }, { "epoch": 0.6985710864532609, "grad_norm": 0.3133746481821846, "learning_rate": 4.398833817230092e-06, "loss": 0.1566, "step": 8751 }, { "epoch": 0.6986509140257045, "grad_norm": 0.30133451774801767, "learning_rate": 4.396692173655157e-06, "loss": 0.1988, "step": 8752 }, { "epoch": 0.698730741598148, "grad_norm": 0.25254096784939023, "learning_rate": 4.394550904639175e-06, "loss": 0.178, "step": 8753 }, { "epoch": 0.6988105691705915, "grad_norm": 0.26921161196634674, "learning_rate": 4.392410010325279e-06, "loss": 0.1474, "step": 8754 }, { "epoch": 0.698890396743035, "grad_norm": 0.2633871220925403, "learning_rate": 4.390269490856586e-06, "loss": 0.1537, "step": 8755 }, { "epoch": 0.6989702243154786, "grad_norm": 0.27577484313388656, "learning_rate": 4.388129346376177e-06, "loss": 0.1825, "step": 8756 }, { "epoch": 0.6990500518879221, "grad_norm": 0.28605619428843326, "learning_rate": 4.385989577027113e-06, "loss": 0.1778, "step": 8757 }, { "epoch": 0.6991298794603656, "grad_norm": 0.2852858709150306, "learning_rate": 4.383850182952428e-06, "loss": 0.148, "step": 8758 }, { "epoch": 0.6992097070328092, "grad_norm": 0.33108831926239335, "learning_rate": 4.38171116429513e-06, "loss": 0.1465, "step": 8759 }, { "epoch": 0.6992895346052527, "grad_norm": 0.2782683245929548, "learning_rate": 4.37957252119821e-06, "loss": 0.1132, "step": 8760 }, { "epoch": 0.6993693621776962, "grad_norm": 0.324124672090754, "learning_rate": 4.377434253804626e-06, "loss": 0.1339, "step": 8761 }, { "epoch": 0.6994491897501397, "grad_norm": 0.3181147792033584, "learning_rate": 4.375296362257309e-06, "loss": 0.1812, "step": 8762 }, { "epoch": 0.6995290173225832, "grad_norm": 0.2790300885591227, "learning_rate": 4.373158846699168e-06, "loss": 0.1738, "step": 8763 }, { "epoch": 0.6996088448950267, "grad_norm": 0.2875502511773792, "learning_rate": 4.371021707273095e-06, "loss": 0.1871, "step": 8764 }, { "epoch": 0.6996886724674702, "grad_norm": 0.27598507211664375, "learning_rate": 4.368884944121946e-06, "loss": 0.1584, "step": 8765 }, { "epoch": 0.6997685000399138, "grad_norm": 0.3559033710907851, "learning_rate": 4.366748557388548e-06, "loss": 0.1489, "step": 8766 }, { "epoch": 0.6998483276123573, "grad_norm": 0.31299477637048534, "learning_rate": 4.364612547215719e-06, "loss": 0.1498, "step": 8767 }, { "epoch": 0.6999281551848008, "grad_norm": 0.31541840738186744, "learning_rate": 4.362476913746237e-06, "loss": 0.1439, "step": 8768 }, { "epoch": 0.7000079827572444, "grad_norm": 0.28614792228379976, "learning_rate": 4.36034165712287e-06, "loss": 0.1546, "step": 8769 }, { "epoch": 0.7000878103296879, "grad_norm": 0.3036188889692148, "learning_rate": 4.358206777488336e-06, "loss": 0.1602, "step": 8770 }, { "epoch": 0.7001676379021314, "grad_norm": 0.3066045191987089, "learning_rate": 4.356072274985358e-06, "loss": 0.1528, "step": 8771 }, { "epoch": 0.700247465474575, "grad_norm": 0.2630912217601618, "learning_rate": 4.353938149756608e-06, "loss": 0.1621, "step": 8772 }, { "epoch": 0.7003272930470185, "grad_norm": 0.2539966582558762, "learning_rate": 4.351804401944756e-06, "loss": 0.1244, "step": 8773 }, { "epoch": 0.700407120619462, "grad_norm": 0.3335169067713057, "learning_rate": 4.349671031692424e-06, "loss": 0.2067, "step": 8774 }, { "epoch": 0.7004869481919055, "grad_norm": 0.3150466306870647, "learning_rate": 4.347538039142219e-06, "loss": 0.1887, "step": 8775 }, { "epoch": 0.700566775764349, "grad_norm": 0.2745875134673676, "learning_rate": 4.345405424436732e-06, "loss": 0.1119, "step": 8776 }, { "epoch": 0.7006466033367925, "grad_norm": 0.28033668465227185, "learning_rate": 4.343273187718513e-06, "loss": 0.1334, "step": 8777 }, { "epoch": 0.700726430909236, "grad_norm": 0.26106012975699827, "learning_rate": 4.341141329130095e-06, "loss": 0.1822, "step": 8778 }, { "epoch": 0.7008062584816795, "grad_norm": 0.30468010879744173, "learning_rate": 4.339009848813983e-06, "loss": 0.1437, "step": 8779 }, { "epoch": 0.7008860860541231, "grad_norm": 0.31171768435542196, "learning_rate": 4.336878746912662e-06, "loss": 0.1237, "step": 8780 }, { "epoch": 0.7009659136265666, "grad_norm": 0.26720835974599083, "learning_rate": 4.334748023568586e-06, "loss": 0.1413, "step": 8781 }, { "epoch": 0.7010457411990101, "grad_norm": 0.26418615409205104, "learning_rate": 4.332617678924184e-06, "loss": 0.1597, "step": 8782 }, { "epoch": 0.7011255687714537, "grad_norm": 0.31858908333399005, "learning_rate": 4.330487713121863e-06, "loss": 0.1877, "step": 8783 }, { "epoch": 0.7012053963438972, "grad_norm": 0.2934957927491758, "learning_rate": 4.328358126303997e-06, "loss": 0.1681, "step": 8784 }, { "epoch": 0.7012852239163407, "grad_norm": 0.33835724594655375, "learning_rate": 4.326228918612949e-06, "loss": 0.1398, "step": 8785 }, { "epoch": 0.7013650514887843, "grad_norm": 0.27065266764041396, "learning_rate": 4.324100090191045e-06, "loss": 0.1681, "step": 8786 }, { "epoch": 0.7014448790612278, "grad_norm": 0.2795759363971893, "learning_rate": 4.321971641180588e-06, "loss": 0.1516, "step": 8787 }, { "epoch": 0.7015247066336713, "grad_norm": 0.3348365272171688, "learning_rate": 4.319843571723855e-06, "loss": 0.1678, "step": 8788 }, { "epoch": 0.7016045342061148, "grad_norm": 0.2845385937776781, "learning_rate": 4.317715881963098e-06, "loss": 0.1566, "step": 8789 }, { "epoch": 0.7016843617785583, "grad_norm": 0.2944615795259304, "learning_rate": 4.315588572040551e-06, "loss": 0.165, "step": 8790 }, { "epoch": 0.7017641893510018, "grad_norm": 0.278383600489838, "learning_rate": 4.313461642098412e-06, "loss": 0.1381, "step": 8791 }, { "epoch": 0.7018440169234453, "grad_norm": 0.2813950468913727, "learning_rate": 4.311335092278858e-06, "loss": 0.1488, "step": 8792 }, { "epoch": 0.7019238444958888, "grad_norm": 0.30612153902016354, "learning_rate": 4.309208922724037e-06, "loss": 0.1909, "step": 8793 }, { "epoch": 0.7020036720683324, "grad_norm": 0.29071905633320566, "learning_rate": 4.307083133576082e-06, "loss": 0.1682, "step": 8794 }, { "epoch": 0.7020834996407759, "grad_norm": 0.2934670497387195, "learning_rate": 4.304957724977091e-06, "loss": 0.1274, "step": 8795 }, { "epoch": 0.7021633272132195, "grad_norm": 0.339127468704049, "learning_rate": 4.302832697069138e-06, "loss": 0.1703, "step": 8796 }, { "epoch": 0.702243154785663, "grad_norm": 0.27639783497444964, "learning_rate": 4.3007080499942736e-06, "loss": 0.2126, "step": 8797 }, { "epoch": 0.7023229823581065, "grad_norm": 0.28786853378620536, "learning_rate": 4.2985837838945176e-06, "loss": 0.1918, "step": 8798 }, { "epoch": 0.70240280993055, "grad_norm": 0.3159256191349339, "learning_rate": 4.296459898911877e-06, "loss": 0.1954, "step": 8799 }, { "epoch": 0.7024826375029936, "grad_norm": 0.34019944115456563, "learning_rate": 4.294336395188319e-06, "loss": 0.1402, "step": 8800 }, { "epoch": 0.7025624650754371, "grad_norm": 0.3181124968644968, "learning_rate": 4.292213272865795e-06, "loss": 0.1335, "step": 8801 }, { "epoch": 0.7026422926478806, "grad_norm": 0.2498909442490753, "learning_rate": 4.290090532086224e-06, "loss": 0.1574, "step": 8802 }, { "epoch": 0.7027221202203241, "grad_norm": 0.3540323743359365, "learning_rate": 4.2879681729915025e-06, "loss": 0.151, "step": 8803 }, { "epoch": 0.7028019477927676, "grad_norm": 0.28000080722577675, "learning_rate": 4.28584619572351e-06, "loss": 0.154, "step": 8804 }, { "epoch": 0.7028817753652111, "grad_norm": 0.28100618646288145, "learning_rate": 4.2837246004240785e-06, "loss": 0.1985, "step": 8805 }, { "epoch": 0.7029616029376546, "grad_norm": 0.28318045993208096, "learning_rate": 4.28160338723504e-06, "loss": 0.1996, "step": 8806 }, { "epoch": 0.7030414305100982, "grad_norm": 0.28847615754329287, "learning_rate": 4.279482556298181e-06, "loss": 0.1399, "step": 8807 }, { "epoch": 0.7031212580825417, "grad_norm": 0.27586054688422995, "learning_rate": 4.277362107755279e-06, "loss": 0.1757, "step": 8808 }, { "epoch": 0.7032010856549852, "grad_norm": 0.26764158781484765, "learning_rate": 4.275242041748072e-06, "loss": 0.201, "step": 8809 }, { "epoch": 0.7032809132274288, "grad_norm": 0.2820588369377074, "learning_rate": 4.27312235841828e-06, "loss": 0.1492, "step": 8810 }, { "epoch": 0.7033607407998723, "grad_norm": 0.32787335250841887, "learning_rate": 4.271003057907595e-06, "loss": 0.2021, "step": 8811 }, { "epoch": 0.7034405683723158, "grad_norm": 0.2836102361102692, "learning_rate": 4.268884140357679e-06, "loss": 0.1844, "step": 8812 }, { "epoch": 0.7035203959447593, "grad_norm": 0.3018335293145041, "learning_rate": 4.266765605910186e-06, "loss": 0.1682, "step": 8813 }, { "epoch": 0.7036002235172029, "grad_norm": 0.28558129411181077, "learning_rate": 4.264647454706715e-06, "loss": 0.1617, "step": 8814 }, { "epoch": 0.7036800510896464, "grad_norm": 0.3006448316262476, "learning_rate": 4.2625296868888685e-06, "loss": 0.1495, "step": 8815 }, { "epoch": 0.7037598786620899, "grad_norm": 0.3041436850077754, "learning_rate": 4.260412302598203e-06, "loss": 0.1611, "step": 8816 }, { "epoch": 0.7038397062345334, "grad_norm": 0.3057734267401533, "learning_rate": 4.258295301976268e-06, "loss": 0.1524, "step": 8817 }, { "epoch": 0.7039195338069769, "grad_norm": 0.2769263346744514, "learning_rate": 4.256178685164568e-06, "loss": 0.1787, "step": 8818 }, { "epoch": 0.7039993613794204, "grad_norm": 0.31694939345608314, "learning_rate": 4.254062452304587e-06, "loss": 0.1637, "step": 8819 }, { "epoch": 0.7040791889518639, "grad_norm": 0.328073734499217, "learning_rate": 4.251946603537795e-06, "loss": 0.1728, "step": 8820 }, { "epoch": 0.7041590165243075, "grad_norm": 0.26995208458398245, "learning_rate": 4.249831139005627e-06, "loss": 0.1524, "step": 8821 }, { "epoch": 0.704238844096751, "grad_norm": 0.2741827683399422, "learning_rate": 4.247716058849491e-06, "loss": 0.1363, "step": 8822 }, { "epoch": 0.7043186716691945, "grad_norm": 0.2638905322115754, "learning_rate": 4.245601363210768e-06, "loss": 0.1441, "step": 8823 }, { "epoch": 0.7043984992416381, "grad_norm": 0.289830494387169, "learning_rate": 4.243487052230825e-06, "loss": 0.1862, "step": 8824 }, { "epoch": 0.7044783268140816, "grad_norm": 0.26241565043483994, "learning_rate": 4.241373126050991e-06, "loss": 0.1413, "step": 8825 }, { "epoch": 0.7045581543865251, "grad_norm": 0.2671977785083658, "learning_rate": 4.239259584812575e-06, "loss": 0.2321, "step": 8826 }, { "epoch": 0.7046379819589687, "grad_norm": 0.2851002077463946, "learning_rate": 4.237146428656858e-06, "loss": 0.1835, "step": 8827 }, { "epoch": 0.7047178095314122, "grad_norm": 0.28276067020052775, "learning_rate": 4.235033657725093e-06, "loss": 0.1784, "step": 8828 }, { "epoch": 0.7047976371038557, "grad_norm": 0.3109008192154429, "learning_rate": 4.232921272158515e-06, "loss": 0.1803, "step": 8829 }, { "epoch": 0.7048774646762992, "grad_norm": 0.32656081729300807, "learning_rate": 4.2308092720983295e-06, "loss": 0.1424, "step": 8830 }, { "epoch": 0.7049572922487427, "grad_norm": 0.3451008586653655, "learning_rate": 4.228697657685713e-06, "loss": 0.2091, "step": 8831 }, { "epoch": 0.7050371198211862, "grad_norm": 0.27482784819565204, "learning_rate": 4.2265864290618174e-06, "loss": 0.2011, "step": 8832 }, { "epoch": 0.7051169473936297, "grad_norm": 0.24833131795712457, "learning_rate": 4.224475586367769e-06, "loss": 0.1957, "step": 8833 }, { "epoch": 0.7051967749660732, "grad_norm": 0.2747274056212698, "learning_rate": 4.222365129744673e-06, "loss": 0.171, "step": 8834 }, { "epoch": 0.7052766025385168, "grad_norm": 0.30784282585351436, "learning_rate": 4.220255059333605e-06, "loss": 0.1435, "step": 8835 }, { "epoch": 0.7053564301109603, "grad_norm": 0.2912669491958978, "learning_rate": 4.218145375275614e-06, "loss": 0.1391, "step": 8836 }, { "epoch": 0.7054362576834039, "grad_norm": 0.3475491128194537, "learning_rate": 4.21603607771172e-06, "loss": 0.1882, "step": 8837 }, { "epoch": 0.7055160852558474, "grad_norm": 0.24549191394834546, "learning_rate": 4.213927166782926e-06, "loss": 0.1251, "step": 8838 }, { "epoch": 0.7055959128282909, "grad_norm": 0.3095045572601103, "learning_rate": 4.211818642630204e-06, "loss": 0.1587, "step": 8839 }, { "epoch": 0.7056757404007344, "grad_norm": 0.3029483992159317, "learning_rate": 4.2097105053945e-06, "loss": 0.122, "step": 8840 }, { "epoch": 0.705755567973178, "grad_norm": 0.2742794905506947, "learning_rate": 4.207602755216733e-06, "loss": 0.1346, "step": 8841 }, { "epoch": 0.7058353955456215, "grad_norm": 0.2586836794057583, "learning_rate": 4.205495392237796e-06, "loss": 0.1829, "step": 8842 }, { "epoch": 0.705915223118065, "grad_norm": 0.29299710893432057, "learning_rate": 4.203388416598565e-06, "loss": 0.1575, "step": 8843 }, { "epoch": 0.7059950506905085, "grad_norm": 0.30510697290290767, "learning_rate": 4.201281828439878e-06, "loss": 0.1431, "step": 8844 }, { "epoch": 0.706074878262952, "grad_norm": 0.27763294390921067, "learning_rate": 4.199175627902553e-06, "loss": 0.1852, "step": 8845 }, { "epoch": 0.7061547058353955, "grad_norm": 0.24292879983424595, "learning_rate": 4.197069815127377e-06, "loss": 0.1743, "step": 8846 }, { "epoch": 0.706234533407839, "grad_norm": 0.30939674597443056, "learning_rate": 4.194964390255123e-06, "loss": 0.1832, "step": 8847 }, { "epoch": 0.7063143609802826, "grad_norm": 0.28440718911932855, "learning_rate": 4.192859353426531e-06, "loss": 0.1596, "step": 8848 }, { "epoch": 0.7063941885527261, "grad_norm": 0.28897037908064316, "learning_rate": 4.190754704782301e-06, "loss": 0.1719, "step": 8849 }, { "epoch": 0.7064740161251696, "grad_norm": 0.28531305309738375, "learning_rate": 4.1886504444631335e-06, "loss": 0.1307, "step": 8850 }, { "epoch": 0.7065538436976132, "grad_norm": 0.2854415999814385, "learning_rate": 4.186546572609682e-06, "loss": 0.1693, "step": 8851 }, { "epoch": 0.7066336712700567, "grad_norm": 0.28370456221207635, "learning_rate": 4.1844430893625945e-06, "loss": 0.1297, "step": 8852 }, { "epoch": 0.7067134988425002, "grad_norm": 0.255396605046615, "learning_rate": 4.182339994862463e-06, "loss": 0.1868, "step": 8853 }, { "epoch": 0.7067933264149437, "grad_norm": 0.3100936780327153, "learning_rate": 4.180237289249884e-06, "loss": 0.1522, "step": 8854 }, { "epoch": 0.7068731539873873, "grad_norm": 0.33801870672656026, "learning_rate": 4.178134972665412e-06, "loss": 0.1376, "step": 8855 }, { "epoch": 0.7069529815598308, "grad_norm": 0.3109442671926193, "learning_rate": 4.176033045249572e-06, "loss": 0.176, "step": 8856 }, { "epoch": 0.7070328091322743, "grad_norm": 0.2577597357842124, "learning_rate": 4.173931507142884e-06, "loss": 0.1176, "step": 8857 }, { "epoch": 0.7071126367047178, "grad_norm": 0.2675327948290439, "learning_rate": 4.171830358485811e-06, "loss": 0.1416, "step": 8858 }, { "epoch": 0.7071924642771613, "grad_norm": 0.30721526110905684, "learning_rate": 4.169729599418817e-06, "loss": 0.1392, "step": 8859 }, { "epoch": 0.7072722918496048, "grad_norm": 0.2434762639989521, "learning_rate": 4.167629230082323e-06, "loss": 0.1693, "step": 8860 }, { "epoch": 0.7073521194220483, "grad_norm": 0.326955431074519, "learning_rate": 4.165529250616741e-06, "loss": 0.143, "step": 8861 }, { "epoch": 0.7074319469944919, "grad_norm": 0.28659840100849315, "learning_rate": 4.163429661162436e-06, "loss": 0.1925, "step": 8862 }, { "epoch": 0.7075117745669354, "grad_norm": 0.2762536282241742, "learning_rate": 4.161330461859756e-06, "loss": 0.1836, "step": 8863 }, { "epoch": 0.707591602139379, "grad_norm": 0.24776281933903246, "learning_rate": 4.159231652849033e-06, "loss": 0.132, "step": 8864 }, { "epoch": 0.7076714297118225, "grad_norm": 0.2761134919169592, "learning_rate": 4.157133234270558e-06, "loss": 0.1611, "step": 8865 }, { "epoch": 0.707751257284266, "grad_norm": 0.28028812590880553, "learning_rate": 4.155035206264606e-06, "loss": 0.1571, "step": 8866 }, { "epoch": 0.7078310848567095, "grad_norm": 0.2941619176681456, "learning_rate": 4.152937568971415e-06, "loss": 0.1221, "step": 8867 }, { "epoch": 0.707910912429153, "grad_norm": 0.37962091659463687, "learning_rate": 4.150840322531211e-06, "loss": 0.2017, "step": 8868 }, { "epoch": 0.7079907400015966, "grad_norm": 0.2785982921444291, "learning_rate": 4.148743467084184e-06, "loss": 0.1541, "step": 8869 }, { "epoch": 0.7080705675740401, "grad_norm": 0.31417579464249534, "learning_rate": 4.1466470027705e-06, "loss": 0.1592, "step": 8870 }, { "epoch": 0.7081503951464836, "grad_norm": 0.35404559871548336, "learning_rate": 4.144550929730301e-06, "loss": 0.1619, "step": 8871 }, { "epoch": 0.7082302227189271, "grad_norm": 0.30077630225108476, "learning_rate": 4.142455248103695e-06, "loss": 0.1188, "step": 8872 }, { "epoch": 0.7083100502913706, "grad_norm": 0.27835614929365055, "learning_rate": 4.1403599580307795e-06, "loss": 0.183, "step": 8873 }, { "epoch": 0.7083898778638141, "grad_norm": 0.29924703686774734, "learning_rate": 4.138265059651612e-06, "loss": 0.15, "step": 8874 }, { "epoch": 0.7084697054362576, "grad_norm": 0.263348308010713, "learning_rate": 4.136170553106227e-06, "loss": 0.1642, "step": 8875 }, { "epoch": 0.7085495330087012, "grad_norm": 0.2971778033070193, "learning_rate": 4.134076438534631e-06, "loss": 0.1246, "step": 8876 }, { "epoch": 0.7086293605811447, "grad_norm": 0.27069517803827514, "learning_rate": 4.131982716076816e-06, "loss": 0.1673, "step": 8877 }, { "epoch": 0.7087091881535883, "grad_norm": 0.2877445913467807, "learning_rate": 4.129889385872733e-06, "loss": 0.1705, "step": 8878 }, { "epoch": 0.7087890157260318, "grad_norm": 0.32880026968802845, "learning_rate": 4.127796448062315e-06, "loss": 0.1501, "step": 8879 }, { "epoch": 0.7088688432984753, "grad_norm": 0.31613830247608393, "learning_rate": 4.125703902785464e-06, "loss": 0.1941, "step": 8880 }, { "epoch": 0.7089486708709188, "grad_norm": 0.29325006079134786, "learning_rate": 4.123611750182058e-06, "loss": 0.1375, "step": 8881 }, { "epoch": 0.7090284984433624, "grad_norm": 0.260011980634021, "learning_rate": 4.1215199903919545e-06, "loss": 0.1354, "step": 8882 }, { "epoch": 0.7091083260158059, "grad_norm": 0.34379623435226064, "learning_rate": 4.119428623554975e-06, "loss": 0.1983, "step": 8883 }, { "epoch": 0.7091881535882494, "grad_norm": 0.28421048507871927, "learning_rate": 4.117337649810922e-06, "loss": 0.1341, "step": 8884 }, { "epoch": 0.7092679811606929, "grad_norm": 0.2652517613973444, "learning_rate": 4.115247069299565e-06, "loss": 0.1476, "step": 8885 }, { "epoch": 0.7093478087331364, "grad_norm": 0.3127870755997861, "learning_rate": 4.113156882160651e-06, "loss": 0.1232, "step": 8886 }, { "epoch": 0.7094276363055799, "grad_norm": 0.34753602944001194, "learning_rate": 4.1110670885339056e-06, "loss": 0.1533, "step": 8887 }, { "epoch": 0.7095074638780234, "grad_norm": 0.2886943586260139, "learning_rate": 4.108977688559019e-06, "loss": 0.1725, "step": 8888 }, { "epoch": 0.709587291450467, "grad_norm": 0.2736965834484397, "learning_rate": 4.106888682375662e-06, "loss": 0.1448, "step": 8889 }, { "epoch": 0.7096671190229105, "grad_norm": 0.3196795360376458, "learning_rate": 4.1048000701234725e-06, "loss": 0.1566, "step": 8890 }, { "epoch": 0.7097469465953541, "grad_norm": 0.2835521017120251, "learning_rate": 4.1027118519420705e-06, "loss": 0.1757, "step": 8891 }, { "epoch": 0.7098267741677976, "grad_norm": 0.30504693903938496, "learning_rate": 4.100624027971047e-06, "loss": 0.1553, "step": 8892 }, { "epoch": 0.7099066017402411, "grad_norm": 0.2880004162676901, "learning_rate": 4.098536598349954e-06, "loss": 0.1563, "step": 8893 }, { "epoch": 0.7099864293126846, "grad_norm": 0.287286204920285, "learning_rate": 4.096449563218339e-06, "loss": 0.1456, "step": 8894 }, { "epoch": 0.7100662568851281, "grad_norm": 0.2767282027576055, "learning_rate": 4.094362922715703e-06, "loss": 0.1374, "step": 8895 }, { "epoch": 0.7101460844575717, "grad_norm": 0.2592596872974979, "learning_rate": 4.092276676981543e-06, "loss": 0.151, "step": 8896 }, { "epoch": 0.7102259120300152, "grad_norm": 0.3082246114058833, "learning_rate": 4.090190826155299e-06, "loss": 0.154, "step": 8897 }, { "epoch": 0.7103057396024587, "grad_norm": 0.3343142822827188, "learning_rate": 4.088105370376415e-06, "loss": 0.1682, "step": 8898 }, { "epoch": 0.7103855671749022, "grad_norm": 0.30404907430765776, "learning_rate": 4.086020309784292e-06, "loss": 0.1728, "step": 8899 }, { "epoch": 0.7104653947473457, "grad_norm": 0.2927245348962496, "learning_rate": 4.083935644518303e-06, "loss": 0.1484, "step": 8900 }, { "epoch": 0.7105452223197892, "grad_norm": 0.3176392392982451, "learning_rate": 4.08185137471781e-06, "loss": 0.1842, "step": 8901 }, { "epoch": 0.7106250498922327, "grad_norm": 0.26126518634634, "learning_rate": 4.079767500522126e-06, "loss": 0.1641, "step": 8902 }, { "epoch": 0.7107048774646763, "grad_norm": 0.28525654414751705, "learning_rate": 4.077684022070559e-06, "loss": 0.1783, "step": 8903 }, { "epoch": 0.7107847050371198, "grad_norm": 0.23723306281170745, "learning_rate": 4.075600939502374e-06, "loss": 0.1534, "step": 8904 }, { "epoch": 0.7108645326095634, "grad_norm": 0.364413394647593, "learning_rate": 4.073518252956829e-06, "loss": 0.2132, "step": 8905 }, { "epoch": 0.7109443601820069, "grad_norm": 0.28469860049076845, "learning_rate": 4.071435962573128e-06, "loss": 0.1429, "step": 8906 }, { "epoch": 0.7110241877544504, "grad_norm": 0.2904700153002066, "learning_rate": 4.069354068490475e-06, "loss": 0.1488, "step": 8907 }, { "epoch": 0.7111040153268939, "grad_norm": 0.27048476598222804, "learning_rate": 4.06727257084803e-06, "loss": 0.1532, "step": 8908 }, { "epoch": 0.7111838428993374, "grad_norm": 0.27896545588721927, "learning_rate": 4.065191469784937e-06, "loss": 0.1548, "step": 8909 }, { "epoch": 0.711263670471781, "grad_norm": 0.36527988513718046, "learning_rate": 4.063110765440308e-06, "loss": 0.1801, "step": 8910 }, { "epoch": 0.7113434980442245, "grad_norm": 0.2439355242460713, "learning_rate": 4.061030457953226e-06, "loss": 0.1773, "step": 8911 }, { "epoch": 0.711423325616668, "grad_norm": 0.3079157432502349, "learning_rate": 4.058950547462758e-06, "loss": 0.1788, "step": 8912 }, { "epoch": 0.7115031531891115, "grad_norm": 0.27701118400779545, "learning_rate": 4.056871034107934e-06, "loss": 0.1696, "step": 8913 }, { "epoch": 0.711582980761555, "grad_norm": 0.27368834860361774, "learning_rate": 4.054791918027763e-06, "loss": 0.1322, "step": 8914 }, { "epoch": 0.7116628083339985, "grad_norm": 0.3143763053913289, "learning_rate": 4.0527131993612234e-06, "loss": 0.1756, "step": 8915 }, { "epoch": 0.711742635906442, "grad_norm": 0.27042341647505147, "learning_rate": 4.050634878247268e-06, "loss": 0.1806, "step": 8916 }, { "epoch": 0.7118224634788856, "grad_norm": 0.2709776000106365, "learning_rate": 4.0485569548248305e-06, "loss": 0.1332, "step": 8917 }, { "epoch": 0.7119022910513292, "grad_norm": 0.2768821016168194, "learning_rate": 4.046479429232808e-06, "loss": 0.1972, "step": 8918 }, { "epoch": 0.7119821186237727, "grad_norm": 0.31189040990680605, "learning_rate": 4.044402301610074e-06, "loss": 0.1334, "step": 8919 }, { "epoch": 0.7120619461962162, "grad_norm": 0.34978348045807617, "learning_rate": 4.042325572095474e-06, "loss": 0.1864, "step": 8920 }, { "epoch": 0.7121417737686597, "grad_norm": 0.26911320481838474, "learning_rate": 4.040249240827838e-06, "loss": 0.1393, "step": 8921 }, { "epoch": 0.7122216013411032, "grad_norm": 0.3417251211201166, "learning_rate": 4.038173307945954e-06, "loss": 0.1391, "step": 8922 }, { "epoch": 0.7123014289135468, "grad_norm": 0.321849786307314, "learning_rate": 4.03609777358859e-06, "loss": 0.1731, "step": 8923 }, { "epoch": 0.7123812564859903, "grad_norm": 0.2829913119143864, "learning_rate": 4.034022637894489e-06, "loss": 0.1447, "step": 8924 }, { "epoch": 0.7124610840584338, "grad_norm": 0.2967941207788305, "learning_rate": 4.031947901002361e-06, "loss": 0.1701, "step": 8925 }, { "epoch": 0.7125409116308773, "grad_norm": 0.32737749402718014, "learning_rate": 4.029873563050901e-06, "loss": 0.1849, "step": 8926 }, { "epoch": 0.7126207392033208, "grad_norm": 0.23038400612662127, "learning_rate": 4.027799624178767e-06, "loss": 0.1365, "step": 8927 }, { "epoch": 0.7127005667757643, "grad_norm": 0.302501624054635, "learning_rate": 4.025726084524594e-06, "loss": 0.1789, "step": 8928 }, { "epoch": 0.7127803943482078, "grad_norm": 0.28301376418496577, "learning_rate": 4.023652944226989e-06, "loss": 0.1354, "step": 8929 }, { "epoch": 0.7128602219206513, "grad_norm": 0.25189275136027556, "learning_rate": 4.021580203424531e-06, "loss": 0.1373, "step": 8930 }, { "epoch": 0.7129400494930949, "grad_norm": 0.26743188225863507, "learning_rate": 4.01950786225578e-06, "loss": 0.1986, "step": 8931 }, { "epoch": 0.7130198770655385, "grad_norm": 0.29795561462030895, "learning_rate": 4.017435920859262e-06, "loss": 0.1396, "step": 8932 }, { "epoch": 0.713099704637982, "grad_norm": 0.2715874617479516, "learning_rate": 4.0153643793734765e-06, "loss": 0.1621, "step": 8933 }, { "epoch": 0.7131795322104255, "grad_norm": 0.2914402007632379, "learning_rate": 4.013293237936895e-06, "loss": 0.1454, "step": 8934 }, { "epoch": 0.713259359782869, "grad_norm": 0.26148504705118614, "learning_rate": 4.011222496687972e-06, "loss": 0.1958, "step": 8935 }, { "epoch": 0.7133391873553125, "grad_norm": 0.27765238636709016, "learning_rate": 4.009152155765125e-06, "loss": 0.1693, "step": 8936 }, { "epoch": 0.7134190149277561, "grad_norm": 0.30765572888473214, "learning_rate": 4.007082215306748e-06, "loss": 0.1653, "step": 8937 }, { "epoch": 0.7134988425001996, "grad_norm": 0.25135044045314264, "learning_rate": 4.005012675451209e-06, "loss": 0.1512, "step": 8938 }, { "epoch": 0.7135786700726431, "grad_norm": 0.2998022963126521, "learning_rate": 4.0029435363368445e-06, "loss": 0.1718, "step": 8939 }, { "epoch": 0.7136584976450866, "grad_norm": 0.2528503220554115, "learning_rate": 4.000874798101978e-06, "loss": 0.1514, "step": 8940 }, { "epoch": 0.7137383252175301, "grad_norm": 0.25788682850788597, "learning_rate": 3.998806460884883e-06, "loss": 0.1792, "step": 8941 }, { "epoch": 0.7138181527899736, "grad_norm": 0.31638490189153173, "learning_rate": 3.996738524823831e-06, "loss": 0.1768, "step": 8942 }, { "epoch": 0.7138979803624171, "grad_norm": 0.2960351027903719, "learning_rate": 3.994670990057048e-06, "loss": 0.2253, "step": 8943 }, { "epoch": 0.7139778079348607, "grad_norm": 0.29270977922343966, "learning_rate": 3.99260385672275e-06, "loss": 0.1632, "step": 8944 }, { "epoch": 0.7140576355073043, "grad_norm": 0.3398189191568772, "learning_rate": 3.990537124959106e-06, "loss": 0.1512, "step": 8945 }, { "epoch": 0.7141374630797478, "grad_norm": 0.3507948648956189, "learning_rate": 3.988470794904271e-06, "loss": 0.1435, "step": 8946 }, { "epoch": 0.7142172906521913, "grad_norm": 0.3283974093112878, "learning_rate": 3.986404866696377e-06, "loss": 0.2094, "step": 8947 }, { "epoch": 0.7142971182246348, "grad_norm": 0.2772470966928844, "learning_rate": 3.984339340473515e-06, "loss": 0.1114, "step": 8948 }, { "epoch": 0.7143769457970783, "grad_norm": 0.31387978276170064, "learning_rate": 3.982274216373769e-06, "loss": 0.1567, "step": 8949 }, { "epoch": 0.7144567733695218, "grad_norm": 0.23919104192986623, "learning_rate": 3.98020949453517e-06, "loss": 0.181, "step": 8950 }, { "epoch": 0.7145366009419654, "grad_norm": 0.309385237912761, "learning_rate": 3.978145175095747e-06, "loss": 0.134, "step": 8951 }, { "epoch": 0.7146164285144089, "grad_norm": 0.29774655708371134, "learning_rate": 3.976081258193487e-06, "loss": 0.1881, "step": 8952 }, { "epoch": 0.7146962560868524, "grad_norm": 0.2771768454443196, "learning_rate": 3.974017743966357e-06, "loss": 0.2072, "step": 8953 }, { "epoch": 0.7147760836592959, "grad_norm": 0.2706833438774198, "learning_rate": 3.971954632552293e-06, "loss": 0.1554, "step": 8954 }, { "epoch": 0.7148559112317394, "grad_norm": 0.27968223188079844, "learning_rate": 3.969891924089203e-06, "loss": 0.1719, "step": 8955 }, { "epoch": 0.7149357388041829, "grad_norm": 0.2750430782694153, "learning_rate": 3.967829618714977e-06, "loss": 0.1744, "step": 8956 }, { "epoch": 0.7150155663766264, "grad_norm": 0.26932561709022335, "learning_rate": 3.96576771656747e-06, "loss": 0.1723, "step": 8957 }, { "epoch": 0.71509539394907, "grad_norm": 0.28683453581508894, "learning_rate": 3.963706217784511e-06, "loss": 0.1618, "step": 8958 }, { "epoch": 0.7151752215215136, "grad_norm": 0.26882297495281077, "learning_rate": 3.961645122503902e-06, "loss": 0.1487, "step": 8959 }, { "epoch": 0.7152550490939571, "grad_norm": 0.32214530177807504, "learning_rate": 3.959584430863418e-06, "loss": 0.1607, "step": 8960 }, { "epoch": 0.7153348766664006, "grad_norm": 0.2893995001730824, "learning_rate": 3.957524143000814e-06, "loss": 0.1455, "step": 8961 }, { "epoch": 0.7154147042388441, "grad_norm": 0.3345959568603167, "learning_rate": 3.9554642590538075e-06, "loss": 0.1898, "step": 8962 }, { "epoch": 0.7154945318112876, "grad_norm": 0.3029381849926694, "learning_rate": 3.953404779160095e-06, "loss": 0.1424, "step": 8963 }, { "epoch": 0.7155743593837312, "grad_norm": 0.25188547048129306, "learning_rate": 3.95134570345734e-06, "loss": 0.1467, "step": 8964 }, { "epoch": 0.7156541869561747, "grad_norm": 0.30222221763628887, "learning_rate": 3.949287032083192e-06, "loss": 0.1624, "step": 8965 }, { "epoch": 0.7157340145286182, "grad_norm": 0.24316433072753835, "learning_rate": 3.94722876517526e-06, "loss": 0.1349, "step": 8966 }, { "epoch": 0.7158138421010617, "grad_norm": 0.261989057727461, "learning_rate": 3.9451709028711315e-06, "loss": 0.1913, "step": 8967 }, { "epoch": 0.7158936696735052, "grad_norm": 0.2669847428681312, "learning_rate": 3.943113445308367e-06, "loss": 0.13, "step": 8968 }, { "epoch": 0.7159734972459487, "grad_norm": 0.2854042952310923, "learning_rate": 3.941056392624496e-06, "loss": 0.1363, "step": 8969 }, { "epoch": 0.7160533248183922, "grad_norm": 0.31498573665077756, "learning_rate": 3.93899974495703e-06, "loss": 0.1474, "step": 8970 }, { "epoch": 0.7161331523908357, "grad_norm": 0.28863822091441715, "learning_rate": 3.936943502443446e-06, "loss": 0.1501, "step": 8971 }, { "epoch": 0.7162129799632793, "grad_norm": 0.24727326946152464, "learning_rate": 3.934887665221194e-06, "loss": 0.1474, "step": 8972 }, { "epoch": 0.7162928075357229, "grad_norm": 0.28401157869005306, "learning_rate": 3.932832233427697e-06, "loss": 0.1962, "step": 8973 }, { "epoch": 0.7163726351081664, "grad_norm": 0.26832420025910775, "learning_rate": 3.930777207200357e-06, "loss": 0.1612, "step": 8974 }, { "epoch": 0.7164524626806099, "grad_norm": 0.3205099095082605, "learning_rate": 3.928722586676544e-06, "loss": 0.1384, "step": 8975 }, { "epoch": 0.7165322902530534, "grad_norm": 0.3146927377330736, "learning_rate": 3.926668371993597e-06, "loss": 0.2047, "step": 8976 }, { "epoch": 0.7166121178254969, "grad_norm": 0.2944183690150558, "learning_rate": 3.924614563288837e-06, "loss": 0.1506, "step": 8977 }, { "epoch": 0.7166919453979405, "grad_norm": 0.35951667153136385, "learning_rate": 3.922561160699545e-06, "loss": 0.1903, "step": 8978 }, { "epoch": 0.716771772970384, "grad_norm": 0.31706257069707233, "learning_rate": 3.920508164362993e-06, "loss": 0.1692, "step": 8979 }, { "epoch": 0.7168516005428275, "grad_norm": 0.32555542534730925, "learning_rate": 3.918455574416411e-06, "loss": 0.1704, "step": 8980 }, { "epoch": 0.716931428115271, "grad_norm": 0.2740163291741814, "learning_rate": 3.916403390997007e-06, "loss": 0.2265, "step": 8981 }, { "epoch": 0.7170112556877145, "grad_norm": 0.28358978535693125, "learning_rate": 3.91435161424196e-06, "loss": 0.1326, "step": 8982 }, { "epoch": 0.717091083260158, "grad_norm": 0.28466503245945507, "learning_rate": 3.912300244288421e-06, "loss": 0.1547, "step": 8983 }, { "epoch": 0.7171709108326015, "grad_norm": 0.274612267813807, "learning_rate": 3.910249281273526e-06, "loss": 0.1751, "step": 8984 }, { "epoch": 0.717250738405045, "grad_norm": 0.262601533029027, "learning_rate": 3.90819872533436e-06, "loss": 0.1418, "step": 8985 }, { "epoch": 0.7173305659774887, "grad_norm": 0.2714633676636545, "learning_rate": 3.906148576608005e-06, "loss": 0.143, "step": 8986 }, { "epoch": 0.7174103935499322, "grad_norm": 0.24851981740277787, "learning_rate": 3.904098835231498e-06, "loss": 0.1597, "step": 8987 }, { "epoch": 0.7174902211223757, "grad_norm": 0.2946176636680974, "learning_rate": 3.902049501341867e-06, "loss": 0.1546, "step": 8988 }, { "epoch": 0.7175700486948192, "grad_norm": 0.3472086349911608, "learning_rate": 3.9000005750760915e-06, "loss": 0.1728, "step": 8989 }, { "epoch": 0.7176498762672627, "grad_norm": 0.28257472841453696, "learning_rate": 3.8979520565711335e-06, "loss": 0.1652, "step": 8990 }, { "epoch": 0.7177297038397062, "grad_norm": 0.27388490902950213, "learning_rate": 3.895903945963936e-06, "loss": 0.127, "step": 8991 }, { "epoch": 0.7178095314121498, "grad_norm": 0.32395302508468105, "learning_rate": 3.8938562433913986e-06, "loss": 0.1484, "step": 8992 }, { "epoch": 0.7178893589845933, "grad_norm": 0.30338039047668763, "learning_rate": 3.891808948990414e-06, "loss": 0.1189, "step": 8993 }, { "epoch": 0.7179691865570368, "grad_norm": 0.3274716924709214, "learning_rate": 3.889762062897821e-06, "loss": 0.1626, "step": 8994 }, { "epoch": 0.7180490141294803, "grad_norm": 0.2725323103321504, "learning_rate": 3.8877155852504585e-06, "loss": 0.1403, "step": 8995 }, { "epoch": 0.7181288417019238, "grad_norm": 0.28534438594112926, "learning_rate": 3.88566951618512e-06, "loss": 0.1531, "step": 8996 }, { "epoch": 0.7182086692743673, "grad_norm": 0.2871594637164953, "learning_rate": 3.883623855838576e-06, "loss": 0.1407, "step": 8997 }, { "epoch": 0.7182884968468108, "grad_norm": 0.27621742953483247, "learning_rate": 3.881578604347573e-06, "loss": 0.1459, "step": 8998 }, { "epoch": 0.7183683244192544, "grad_norm": 0.26584319359371195, "learning_rate": 3.879533761848823e-06, "loss": 0.1588, "step": 8999 }, { "epoch": 0.718448151991698, "grad_norm": 0.28284698403067476, "learning_rate": 3.877489328479025e-06, "loss": 0.1863, "step": 9000 }, { "epoch": 0.7185279795641415, "grad_norm": 0.2760050481693491, "learning_rate": 3.875445304374834e-06, "loss": 0.1801, "step": 9001 }, { "epoch": 0.718607807136585, "grad_norm": 0.29287547418906357, "learning_rate": 3.873401689672889e-06, "loss": 0.1938, "step": 9002 }, { "epoch": 0.7186876347090285, "grad_norm": 0.2959829020760575, "learning_rate": 3.87135848450979e-06, "loss": 0.1845, "step": 9003 }, { "epoch": 0.718767462281472, "grad_norm": 0.3008095459622807, "learning_rate": 3.869315689022127e-06, "loss": 0.1815, "step": 9004 }, { "epoch": 0.7188472898539155, "grad_norm": 0.28419200298366687, "learning_rate": 3.867273303346448e-06, "loss": 0.1233, "step": 9005 }, { "epoch": 0.7189271174263591, "grad_norm": 0.41583016853852267, "learning_rate": 3.86523132761928e-06, "loss": 0.1581, "step": 9006 }, { "epoch": 0.7190069449988026, "grad_norm": 0.24590388161054613, "learning_rate": 3.863189761977119e-06, "loss": 0.2123, "step": 9007 }, { "epoch": 0.7190867725712461, "grad_norm": 0.2572729365946525, "learning_rate": 3.8611486065564326e-06, "loss": 0.1663, "step": 9008 }, { "epoch": 0.7191666001436896, "grad_norm": 0.32224481638198077, "learning_rate": 3.8591078614936725e-06, "loss": 0.1493, "step": 9009 }, { "epoch": 0.7192464277161331, "grad_norm": 0.3302717168364796, "learning_rate": 3.857067526925249e-06, "loss": 0.1517, "step": 9010 }, { "epoch": 0.7193262552885766, "grad_norm": 0.2902644169702484, "learning_rate": 3.855027602987551e-06, "loss": 0.2119, "step": 9011 }, { "epoch": 0.7194060828610201, "grad_norm": 0.3216403575668085, "learning_rate": 3.85298808981694e-06, "loss": 0.1576, "step": 9012 }, { "epoch": 0.7194859104334638, "grad_norm": 0.2987902418877571, "learning_rate": 3.850948987549745e-06, "loss": 0.1867, "step": 9013 }, { "epoch": 0.7195657380059073, "grad_norm": 0.2402953950608372, "learning_rate": 3.848910296322279e-06, "loss": 0.1446, "step": 9014 }, { "epoch": 0.7196455655783508, "grad_norm": 0.36943137473335586, "learning_rate": 3.846872016270818e-06, "loss": 0.2012, "step": 9015 }, { "epoch": 0.7197253931507943, "grad_norm": 0.305061900393281, "learning_rate": 3.844834147531612e-06, "loss": 0.1705, "step": 9016 }, { "epoch": 0.7198052207232378, "grad_norm": 0.28271335562945077, "learning_rate": 3.842796690240881e-06, "loss": 0.2096, "step": 9017 }, { "epoch": 0.7198850482956813, "grad_norm": 0.27055200762949166, "learning_rate": 3.840759644534828e-06, "loss": 0.1059, "step": 9018 }, { "epoch": 0.7199648758681249, "grad_norm": 0.2648122362332328, "learning_rate": 3.838723010549619e-06, "loss": 0.1441, "step": 9019 }, { "epoch": 0.7200447034405684, "grad_norm": 0.27131745800475027, "learning_rate": 3.836686788421392e-06, "loss": 0.16, "step": 9020 }, { "epoch": 0.7201245310130119, "grad_norm": 0.2853787278975384, "learning_rate": 3.834650978286264e-06, "loss": 0.1545, "step": 9021 }, { "epoch": 0.7202043585854554, "grad_norm": 0.25496857980450344, "learning_rate": 3.832615580280316e-06, "loss": 0.1567, "step": 9022 }, { "epoch": 0.7202841861578989, "grad_norm": 0.2655629863704169, "learning_rate": 3.8305805945396116e-06, "loss": 0.1556, "step": 9023 }, { "epoch": 0.7203640137303424, "grad_norm": 0.3207987294712046, "learning_rate": 3.82854602120018e-06, "loss": 0.1999, "step": 9024 }, { "epoch": 0.7204438413027859, "grad_norm": 0.34407070026202197, "learning_rate": 3.826511860398023e-06, "loss": 0.1417, "step": 9025 }, { "epoch": 0.7205236688752295, "grad_norm": 0.250839426524519, "learning_rate": 3.824478112269119e-06, "loss": 0.1292, "step": 9026 }, { "epoch": 0.7206034964476731, "grad_norm": 0.2855879415883588, "learning_rate": 3.8224447769494086e-06, "loss": 0.1696, "step": 9027 }, { "epoch": 0.7206833240201166, "grad_norm": 0.2953241267374894, "learning_rate": 3.820411854574826e-06, "loss": 0.1547, "step": 9028 }, { "epoch": 0.7207631515925601, "grad_norm": 0.2705425130213776, "learning_rate": 3.818379345281249e-06, "loss": 0.1843, "step": 9029 }, { "epoch": 0.7208429791650036, "grad_norm": 0.3108831047542925, "learning_rate": 3.816347249204552e-06, "loss": 0.1872, "step": 9030 }, { "epoch": 0.7209228067374471, "grad_norm": 0.27213219990286946, "learning_rate": 3.8143155664805662e-06, "loss": 0.1589, "step": 9031 }, { "epoch": 0.7210026343098906, "grad_norm": 0.25082905390025756, "learning_rate": 3.8122842972451136e-06, "loss": 0.1615, "step": 9032 }, { "epoch": 0.7210824618823342, "grad_norm": 0.33389821030883343, "learning_rate": 3.810253441633964e-06, "loss": 0.1568, "step": 9033 }, { "epoch": 0.7211622894547777, "grad_norm": 0.2673754932562741, "learning_rate": 3.8082229997828735e-06, "loss": 0.1068, "step": 9034 }, { "epoch": 0.7212421170272212, "grad_norm": 0.27114626070427983, "learning_rate": 3.8061929718275746e-06, "loss": 0.1243, "step": 9035 }, { "epoch": 0.7213219445996647, "grad_norm": 0.27819522484790554, "learning_rate": 3.804163357903764e-06, "loss": 0.1086, "step": 9036 }, { "epoch": 0.7214017721721082, "grad_norm": 0.27532529777425196, "learning_rate": 3.802134158147114e-06, "loss": 0.1747, "step": 9037 }, { "epoch": 0.7214815997445517, "grad_norm": 0.2960169872903615, "learning_rate": 3.8001053726932645e-06, "loss": 0.1736, "step": 9038 }, { "epoch": 0.7215614273169952, "grad_norm": 0.25137440510272113, "learning_rate": 3.798077001677839e-06, "loss": 0.1471, "step": 9039 }, { "epoch": 0.7216412548894389, "grad_norm": 0.3260715569890315, "learning_rate": 3.7960490452364184e-06, "loss": 0.1454, "step": 9040 }, { "epoch": 0.7217210824618824, "grad_norm": 0.2798669724398544, "learning_rate": 3.7940215035045748e-06, "loss": 0.1479, "step": 9041 }, { "epoch": 0.7218009100343259, "grad_norm": 0.32500499069884065, "learning_rate": 3.7919943766178303e-06, "loss": 0.1703, "step": 9042 }, { "epoch": 0.7218807376067694, "grad_norm": 0.30771929897825817, "learning_rate": 3.78996766471169e-06, "loss": 0.1651, "step": 9043 }, { "epoch": 0.7219605651792129, "grad_norm": 0.3155799488923502, "learning_rate": 3.787941367921639e-06, "loss": 0.1409, "step": 9044 }, { "epoch": 0.7220403927516564, "grad_norm": 0.3230400620629357, "learning_rate": 3.785915486383125e-06, "loss": 0.1859, "step": 9045 }, { "epoch": 0.7221202203241, "grad_norm": 0.3137741521469, "learning_rate": 3.7838900202315675e-06, "loss": 0.1687, "step": 9046 }, { "epoch": 0.7222000478965435, "grad_norm": 0.3435969678865559, "learning_rate": 3.7818649696023603e-06, "loss": 0.1954, "step": 9047 }, { "epoch": 0.722279875468987, "grad_norm": 0.27056962526088274, "learning_rate": 3.779840334630874e-06, "loss": 0.1334, "step": 9048 }, { "epoch": 0.7223597030414305, "grad_norm": 0.3511697319539514, "learning_rate": 3.7778161154524475e-06, "loss": 0.1555, "step": 9049 }, { "epoch": 0.722439530613874, "grad_norm": 0.2749809773328827, "learning_rate": 3.7757923122023888e-06, "loss": 0.1669, "step": 9050 }, { "epoch": 0.7225193581863175, "grad_norm": 0.3153586603721763, "learning_rate": 3.7737689250159827e-06, "loss": 0.1567, "step": 9051 }, { "epoch": 0.722599185758761, "grad_norm": 0.262101458823769, "learning_rate": 3.7717459540284797e-06, "loss": 0.1833, "step": 9052 }, { "epoch": 0.7226790133312045, "grad_norm": 0.25412162680779193, "learning_rate": 3.7697233993751157e-06, "loss": 0.1587, "step": 9053 }, { "epoch": 0.7227588409036482, "grad_norm": 0.28604968137796155, "learning_rate": 3.767701261191087e-06, "loss": 0.2144, "step": 9054 }, { "epoch": 0.7228386684760917, "grad_norm": 0.29464635700550196, "learning_rate": 3.765679539611565e-06, "loss": 0.1605, "step": 9055 }, { "epoch": 0.7229184960485352, "grad_norm": 0.2863023621581442, "learning_rate": 3.763658234771693e-06, "loss": 0.1472, "step": 9056 }, { "epoch": 0.7229983236209787, "grad_norm": 0.2699269928695442, "learning_rate": 3.7616373468065857e-06, "loss": 0.2097, "step": 9057 }, { "epoch": 0.7230781511934222, "grad_norm": 0.2780485901581343, "learning_rate": 3.7596168758513374e-06, "loss": 0.1811, "step": 9058 }, { "epoch": 0.7231579787658657, "grad_norm": 0.27824724685808166, "learning_rate": 3.7575968220410052e-06, "loss": 0.1613, "step": 9059 }, { "epoch": 0.7232378063383093, "grad_norm": 0.26671391805674166, "learning_rate": 3.7555771855106218e-06, "loss": 0.1562, "step": 9060 }, { "epoch": 0.7233176339107528, "grad_norm": 0.2668258328314369, "learning_rate": 3.753557966395188e-06, "loss": 0.1572, "step": 9061 }, { "epoch": 0.7233974614831963, "grad_norm": 0.24897669866567404, "learning_rate": 3.751539164829687e-06, "loss": 0.1321, "step": 9062 }, { "epoch": 0.7234772890556398, "grad_norm": 0.2615358829447706, "learning_rate": 3.7495207809490698e-06, "loss": 0.1469, "step": 9063 }, { "epoch": 0.7235571166280833, "grad_norm": 0.29099204599153233, "learning_rate": 3.7475028148882453e-06, "loss": 0.1572, "step": 9064 }, { "epoch": 0.7236369442005268, "grad_norm": 0.28399984380499715, "learning_rate": 3.745485266782116e-06, "loss": 0.1454, "step": 9065 }, { "epoch": 0.7237167717729703, "grad_norm": 0.29284377093597935, "learning_rate": 3.7434681367655436e-06, "loss": 0.2115, "step": 9066 }, { "epoch": 0.723796599345414, "grad_norm": 0.26394061398626356, "learning_rate": 3.741451424973368e-06, "loss": 0.1562, "step": 9067 }, { "epoch": 0.7238764269178575, "grad_norm": 0.25863306938474284, "learning_rate": 3.7394351315403975e-06, "loss": 0.1419, "step": 9068 }, { "epoch": 0.723956254490301, "grad_norm": 0.24526477206903402, "learning_rate": 3.7374192566014133e-06, "loss": 0.1822, "step": 9069 }, { "epoch": 0.7240360820627445, "grad_norm": 0.32288619393295703, "learning_rate": 3.7354038002911643e-06, "loss": 0.1655, "step": 9070 }, { "epoch": 0.724115909635188, "grad_norm": 0.27470213899338136, "learning_rate": 3.7333887627443834e-06, "loss": 0.1696, "step": 9071 }, { "epoch": 0.7241957372076315, "grad_norm": 0.2751241255449201, "learning_rate": 3.7313741440957674e-06, "loss": 0.1586, "step": 9072 }, { "epoch": 0.724275564780075, "grad_norm": 0.3040352148606932, "learning_rate": 3.7293599444799757e-06, "loss": 0.1273, "step": 9073 }, { "epoch": 0.7243553923525186, "grad_norm": 0.3307152036506411, "learning_rate": 3.7273461640316587e-06, "loss": 0.1716, "step": 9074 }, { "epoch": 0.7244352199249621, "grad_norm": 0.28491508393610115, "learning_rate": 3.725332802885424e-06, "loss": 0.1655, "step": 9075 }, { "epoch": 0.7245150474974056, "grad_norm": 0.3070828927238599, "learning_rate": 3.723319861175867e-06, "loss": 0.1239, "step": 9076 }, { "epoch": 0.7245948750698491, "grad_norm": 0.2925511692145809, "learning_rate": 3.721307339037531e-06, "loss": 0.1584, "step": 9077 }, { "epoch": 0.7246747026422926, "grad_norm": 0.2425805005841674, "learning_rate": 3.719295236604955e-06, "loss": 0.1499, "step": 9078 }, { "epoch": 0.7247545302147361, "grad_norm": 0.29756307054478925, "learning_rate": 3.717283554012637e-06, "loss": 0.146, "step": 9079 }, { "epoch": 0.7248343577871796, "grad_norm": 0.32166994568556695, "learning_rate": 3.71527229139505e-06, "loss": 0.1979, "step": 9080 }, { "epoch": 0.7249141853596233, "grad_norm": 0.319757362671079, "learning_rate": 3.7132614488866394e-06, "loss": 0.1744, "step": 9081 }, { "epoch": 0.7249940129320668, "grad_norm": 0.3124940594073539, "learning_rate": 3.7112510266218183e-06, "loss": 0.1704, "step": 9082 }, { "epoch": 0.7250738405045103, "grad_norm": 0.32972330369847064, "learning_rate": 3.709241024734982e-06, "loss": 0.1619, "step": 9083 }, { "epoch": 0.7251536680769538, "grad_norm": 0.23486348412150707, "learning_rate": 3.707231443360485e-06, "loss": 0.1299, "step": 9084 }, { "epoch": 0.7252334956493973, "grad_norm": 0.3053603971423194, "learning_rate": 3.705222282632669e-06, "loss": 0.1403, "step": 9085 }, { "epoch": 0.7253133232218408, "grad_norm": 0.25829547724082375, "learning_rate": 3.7032135426858296e-06, "loss": 0.1483, "step": 9086 }, { "epoch": 0.7253931507942843, "grad_norm": 0.2692115430221108, "learning_rate": 3.7012052236542417e-06, "loss": 0.1666, "step": 9087 }, { "epoch": 0.7254729783667279, "grad_norm": 0.3039771637498484, "learning_rate": 3.6991973256721613e-06, "loss": 0.1769, "step": 9088 }, { "epoch": 0.7255528059391714, "grad_norm": 0.25915421691603335, "learning_rate": 3.6971898488738055e-06, "loss": 0.1256, "step": 9089 }, { "epoch": 0.7256326335116149, "grad_norm": 0.2811503591352498, "learning_rate": 3.695182793393365e-06, "loss": 0.1812, "step": 9090 }, { "epoch": 0.7257124610840584, "grad_norm": 0.27435704391169846, "learning_rate": 3.6931761593649997e-06, "loss": 0.1188, "step": 9091 }, { "epoch": 0.7257922886565019, "grad_norm": 0.26030763577635135, "learning_rate": 3.6911699469228534e-06, "loss": 0.153, "step": 9092 }, { "epoch": 0.7258721162289454, "grad_norm": 0.28003747650482724, "learning_rate": 3.6891641562010295e-06, "loss": 0.1907, "step": 9093 }, { "epoch": 0.725951943801389, "grad_norm": 0.24450809858466832, "learning_rate": 3.6871587873336066e-06, "loss": 0.1203, "step": 9094 }, { "epoch": 0.7260317713738326, "grad_norm": 0.28850492460127536, "learning_rate": 3.685153840454637e-06, "loss": 0.1626, "step": 9095 }, { "epoch": 0.7261115989462761, "grad_norm": 0.2822985197047793, "learning_rate": 3.683149315698139e-06, "loss": 0.1368, "step": 9096 }, { "epoch": 0.7261914265187196, "grad_norm": 0.28125486594417926, "learning_rate": 3.681145213198114e-06, "loss": 0.1272, "step": 9097 }, { "epoch": 0.7262712540911631, "grad_norm": 0.29471282288835843, "learning_rate": 3.679141533088525e-06, "loss": 0.1367, "step": 9098 }, { "epoch": 0.7263510816636066, "grad_norm": 0.2675817389635543, "learning_rate": 3.6771382755033103e-06, "loss": 0.2092, "step": 9099 }, { "epoch": 0.7264309092360501, "grad_norm": 0.26142845086036315, "learning_rate": 3.6751354405763797e-06, "loss": 0.1773, "step": 9100 }, { "epoch": 0.7265107368084937, "grad_norm": 0.2541855834615546, "learning_rate": 3.673133028441611e-06, "loss": 0.1357, "step": 9101 }, { "epoch": 0.7265905643809372, "grad_norm": 0.3156229463241304, "learning_rate": 3.671131039232866e-06, "loss": 0.1892, "step": 9102 }, { "epoch": 0.7266703919533807, "grad_norm": 0.3232899191282141, "learning_rate": 3.6691294730839632e-06, "loss": 0.1536, "step": 9103 }, { "epoch": 0.7267502195258242, "grad_norm": 0.276962611882431, "learning_rate": 3.667128330128703e-06, "loss": 0.1857, "step": 9104 }, { "epoch": 0.7268300470982677, "grad_norm": 0.2798498369093339, "learning_rate": 3.665127610500847e-06, "loss": 0.114, "step": 9105 }, { "epoch": 0.7269098746707112, "grad_norm": 0.296081701214449, "learning_rate": 3.663127314334145e-06, "loss": 0.1733, "step": 9106 }, { "epoch": 0.7269897022431547, "grad_norm": 0.31390847640639125, "learning_rate": 3.6611274417623046e-06, "loss": 0.1906, "step": 9107 }, { "epoch": 0.7270695298155984, "grad_norm": 0.2943586304183241, "learning_rate": 3.659127992919008e-06, "loss": 0.1757, "step": 9108 }, { "epoch": 0.7271493573880419, "grad_norm": 0.3074789977885889, "learning_rate": 3.657128967937912e-06, "loss": 0.139, "step": 9109 }, { "epoch": 0.7272291849604854, "grad_norm": 0.2870371344344093, "learning_rate": 3.6551303669526395e-06, "loss": 0.1678, "step": 9110 }, { "epoch": 0.7273090125329289, "grad_norm": 0.3440867591494328, "learning_rate": 3.6531321900967963e-06, "loss": 0.1986, "step": 9111 }, { "epoch": 0.7273888401053724, "grad_norm": 0.31198473196232934, "learning_rate": 3.6511344375039494e-06, "loss": 0.1443, "step": 9112 }, { "epoch": 0.7274686676778159, "grad_norm": 0.28544865580198897, "learning_rate": 3.6491371093076387e-06, "loss": 0.1485, "step": 9113 }, { "epoch": 0.7275484952502594, "grad_norm": 0.26084899093466246, "learning_rate": 3.6471402056413774e-06, "loss": 0.1441, "step": 9114 }, { "epoch": 0.727628322822703, "grad_norm": 0.28942219125465585, "learning_rate": 3.6451437266386557e-06, "loss": 0.1809, "step": 9115 }, { "epoch": 0.7277081503951465, "grad_norm": 0.26001906467596625, "learning_rate": 3.6431476724329296e-06, "loss": 0.2118, "step": 9116 }, { "epoch": 0.72778797796759, "grad_norm": 0.28705179782997514, "learning_rate": 3.6411520431576186e-06, "loss": 0.1413, "step": 9117 }, { "epoch": 0.7278678055400335, "grad_norm": 0.3001235161996026, "learning_rate": 3.6391568389461317e-06, "loss": 0.1393, "step": 9118 }, { "epoch": 0.727947633112477, "grad_norm": 0.29668351207939353, "learning_rate": 3.6371620599318347e-06, "loss": 0.1531, "step": 9119 }, { "epoch": 0.7280274606849205, "grad_norm": 0.31638346531587264, "learning_rate": 3.6351677062480806e-06, "loss": 0.1236, "step": 9120 }, { "epoch": 0.728107288257364, "grad_norm": 0.28366617951471773, "learning_rate": 3.6331737780281695e-06, "loss": 0.1488, "step": 9121 }, { "epoch": 0.7281871158298077, "grad_norm": 0.33645960505543065, "learning_rate": 3.6311802754053995e-06, "loss": 0.1596, "step": 9122 }, { "epoch": 0.7282669434022512, "grad_norm": 0.29313990755909275, "learning_rate": 3.6291871985130223e-06, "loss": 0.17, "step": 9123 }, { "epoch": 0.7283467709746947, "grad_norm": 0.2994780376744294, "learning_rate": 3.6271945474842708e-06, "loss": 0.1483, "step": 9124 }, { "epoch": 0.7284265985471382, "grad_norm": 0.30438775741100677, "learning_rate": 3.6252023224523425e-06, "loss": 0.1506, "step": 9125 }, { "epoch": 0.7285064261195817, "grad_norm": 0.27884642739083315, "learning_rate": 3.623210523550409e-06, "loss": 0.1599, "step": 9126 }, { "epoch": 0.7285862536920252, "grad_norm": 0.29342406721499004, "learning_rate": 3.621219150911619e-06, "loss": 0.1822, "step": 9127 }, { "epoch": 0.7286660812644687, "grad_norm": 0.30054213432716165, "learning_rate": 3.619228204669085e-06, "loss": 0.1472, "step": 9128 }, { "epoch": 0.7287459088369123, "grad_norm": 0.2664623000877589, "learning_rate": 3.6172376849558942e-06, "loss": 0.1375, "step": 9129 }, { "epoch": 0.7288257364093558, "grad_norm": 0.25862055769406656, "learning_rate": 3.6152475919051056e-06, "loss": 0.1436, "step": 9130 }, { "epoch": 0.7289055639817993, "grad_norm": 0.26084359609035557, "learning_rate": 3.6132579256497437e-06, "loss": 0.1414, "step": 9131 }, { "epoch": 0.7289853915542428, "grad_norm": 0.28450972633693655, "learning_rate": 3.6112686863228184e-06, "loss": 0.1695, "step": 9132 }, { "epoch": 0.7290652191266863, "grad_norm": 0.2880129208631373, "learning_rate": 3.6092798740572988e-06, "loss": 0.1771, "step": 9133 }, { "epoch": 0.7291450466991298, "grad_norm": 0.28259006675211973, "learning_rate": 3.6072914889861287e-06, "loss": 0.1152, "step": 9134 }, { "epoch": 0.7292248742715735, "grad_norm": 0.30477968246283504, "learning_rate": 3.6053035312422203e-06, "loss": 0.181, "step": 9135 }, { "epoch": 0.729304701844017, "grad_norm": 0.2900808075611701, "learning_rate": 3.6033160009584676e-06, "loss": 0.1454, "step": 9136 }, { "epoch": 0.7293845294164605, "grad_norm": 0.27579278367330895, "learning_rate": 3.6013288982677265e-06, "loss": 0.1777, "step": 9137 }, { "epoch": 0.729464356988904, "grad_norm": 0.2612369929977618, "learning_rate": 3.5993422233028265e-06, "loss": 0.149, "step": 9138 }, { "epoch": 0.7295441845613475, "grad_norm": 0.2936314713636058, "learning_rate": 3.597355976196568e-06, "loss": 0.1659, "step": 9139 }, { "epoch": 0.729624012133791, "grad_norm": 0.28518014612322834, "learning_rate": 3.5953701570817213e-06, "loss": 0.1611, "step": 9140 }, { "epoch": 0.7297038397062345, "grad_norm": 0.27374969586535747, "learning_rate": 3.593384766091037e-06, "loss": 0.2196, "step": 9141 }, { "epoch": 0.729783667278678, "grad_norm": 0.27887418526970603, "learning_rate": 3.5913998033572284e-06, "loss": 0.1956, "step": 9142 }, { "epoch": 0.7298634948511216, "grad_norm": 0.27813260078767005, "learning_rate": 3.58941526901298e-06, "loss": 0.1452, "step": 9143 }, { "epoch": 0.7299433224235651, "grad_norm": 0.2626863547445564, "learning_rate": 3.5874311631909486e-06, "loss": 0.1634, "step": 9144 }, { "epoch": 0.7300231499960086, "grad_norm": 0.2849311345937213, "learning_rate": 3.585447486023771e-06, "loss": 0.1335, "step": 9145 }, { "epoch": 0.7301029775684521, "grad_norm": 0.3201180119878865, "learning_rate": 3.583464237644043e-06, "loss": 0.1756, "step": 9146 }, { "epoch": 0.7301828051408956, "grad_norm": 0.3113785540211057, "learning_rate": 3.5814814181843373e-06, "loss": 0.1667, "step": 9147 }, { "epoch": 0.7302626327133391, "grad_norm": 0.2864034261457439, "learning_rate": 3.579499027777199e-06, "loss": 0.1253, "step": 9148 }, { "epoch": 0.7303424602857828, "grad_norm": 0.2929829955664989, "learning_rate": 3.5775170665551385e-06, "loss": 0.1407, "step": 9149 }, { "epoch": 0.7304222878582263, "grad_norm": 0.3952298616032284, "learning_rate": 3.575535534650648e-06, "loss": 0.1628, "step": 9150 }, { "epoch": 0.7305021154306698, "grad_norm": 0.35597753721815845, "learning_rate": 3.5735544321961837e-06, "loss": 0.1714, "step": 9151 }, { "epoch": 0.7305819430031133, "grad_norm": 0.3110185438394201, "learning_rate": 3.571573759324174e-06, "loss": 0.1518, "step": 9152 }, { "epoch": 0.7306617705755568, "grad_norm": 0.2699366839495346, "learning_rate": 3.569593516167017e-06, "loss": 0.1688, "step": 9153 }, { "epoch": 0.7307415981480003, "grad_norm": 0.2789836204464081, "learning_rate": 3.5676137028570833e-06, "loss": 0.1751, "step": 9154 }, { "epoch": 0.7308214257204438, "grad_norm": 0.3258383935476253, "learning_rate": 3.565634319526724e-06, "loss": 0.1515, "step": 9155 }, { "epoch": 0.7309012532928874, "grad_norm": 0.29618209211823293, "learning_rate": 3.5636553663082407e-06, "loss": 0.162, "step": 9156 }, { "epoch": 0.7309810808653309, "grad_norm": 0.2928975849193491, "learning_rate": 3.5616768433339276e-06, "loss": 0.1791, "step": 9157 }, { "epoch": 0.7310609084377744, "grad_norm": 0.2683852985758656, "learning_rate": 3.559698750736036e-06, "loss": 0.1293, "step": 9158 }, { "epoch": 0.7311407360102179, "grad_norm": 0.32030162634383064, "learning_rate": 3.5577210886467983e-06, "loss": 0.1564, "step": 9159 }, { "epoch": 0.7312205635826614, "grad_norm": 0.3133496757548459, "learning_rate": 3.5557438571984148e-06, "loss": 0.1553, "step": 9160 }, { "epoch": 0.7313003911551049, "grad_norm": 0.28283006190017, "learning_rate": 3.5537670565230442e-06, "loss": 0.1635, "step": 9161 }, { "epoch": 0.7313802187275485, "grad_norm": 0.2813555849593872, "learning_rate": 3.5517906867528394e-06, "loss": 0.1116, "step": 9162 }, { "epoch": 0.7314600462999921, "grad_norm": 0.3034831650818207, "learning_rate": 3.549814748019906e-06, "loss": 0.1961, "step": 9163 }, { "epoch": 0.7315398738724356, "grad_norm": 0.2936041198110592, "learning_rate": 3.547839240456338e-06, "loss": 0.2069, "step": 9164 }, { "epoch": 0.7316197014448791, "grad_norm": 0.31594153705926353, "learning_rate": 3.545864164194175e-06, "loss": 0.1278, "step": 9165 }, { "epoch": 0.7316995290173226, "grad_norm": 0.241681148496457, "learning_rate": 3.5438895193654543e-06, "loss": 0.1223, "step": 9166 }, { "epoch": 0.7317793565897661, "grad_norm": 0.29205250721924586, "learning_rate": 3.541915306102166e-06, "loss": 0.1649, "step": 9167 }, { "epoch": 0.7318591841622096, "grad_norm": 0.3277693628689575, "learning_rate": 3.539941524536289e-06, "loss": 0.1521, "step": 9168 }, { "epoch": 0.7319390117346531, "grad_norm": 0.25011583607866444, "learning_rate": 3.537968174799753e-06, "loss": 0.1157, "step": 9169 }, { "epoch": 0.7320188393070967, "grad_norm": 0.2890531414920454, "learning_rate": 3.5359952570244684e-06, "loss": 0.1275, "step": 9170 }, { "epoch": 0.7320986668795402, "grad_norm": 0.3094020378834478, "learning_rate": 3.5340227713423225e-06, "loss": 0.1626, "step": 9171 }, { "epoch": 0.7321784944519837, "grad_norm": 0.2751491523577969, "learning_rate": 3.532050717885166e-06, "loss": 0.1495, "step": 9172 }, { "epoch": 0.7322583220244272, "grad_norm": 0.2907104340716829, "learning_rate": 3.530079096784823e-06, "loss": 0.1489, "step": 9173 }, { "epoch": 0.7323381495968707, "grad_norm": 0.2707641935457119, "learning_rate": 3.5281079081730853e-06, "loss": 0.1696, "step": 9174 }, { "epoch": 0.7324179771693142, "grad_norm": 0.35414998741091064, "learning_rate": 3.5261371521817247e-06, "loss": 0.158, "step": 9175 }, { "epoch": 0.7324978047417579, "grad_norm": 0.285650850830949, "learning_rate": 3.5241668289424745e-06, "loss": 0.1265, "step": 9176 }, { "epoch": 0.7325776323142014, "grad_norm": 0.2880692972659465, "learning_rate": 3.522196938587046e-06, "loss": 0.1314, "step": 9177 }, { "epoch": 0.7326574598866449, "grad_norm": 0.27560971129924117, "learning_rate": 3.520227481247116e-06, "loss": 0.189, "step": 9178 }, { "epoch": 0.7327372874590884, "grad_norm": 0.3372322531278531, "learning_rate": 3.5182584570543323e-06, "loss": 0.1534, "step": 9179 }, { "epoch": 0.7328171150315319, "grad_norm": 0.30672022524371517, "learning_rate": 3.5162898661403233e-06, "loss": 0.1727, "step": 9180 }, { "epoch": 0.7328969426039754, "grad_norm": 0.27224783718363516, "learning_rate": 3.5143217086366778e-06, "loss": 0.1715, "step": 9181 }, { "epoch": 0.7329767701764189, "grad_norm": 0.3015606786113519, "learning_rate": 3.5123539846749586e-06, "loss": 0.1447, "step": 9182 }, { "epoch": 0.7330565977488624, "grad_norm": 0.32238288185165237, "learning_rate": 3.510386694386703e-06, "loss": 0.207, "step": 9183 }, { "epoch": 0.733136425321306, "grad_norm": 0.3042803421728782, "learning_rate": 3.5084198379034098e-06, "loss": 0.1201, "step": 9184 }, { "epoch": 0.7332162528937495, "grad_norm": 0.2699028945600328, "learning_rate": 3.5064534153565644e-06, "loss": 0.1938, "step": 9185 }, { "epoch": 0.733296080466193, "grad_norm": 0.28585428566726484, "learning_rate": 3.504487426877611e-06, "loss": 0.1355, "step": 9186 }, { "epoch": 0.7333759080386365, "grad_norm": 0.30466466449137225, "learning_rate": 3.502521872597967e-06, "loss": 0.1692, "step": 9187 }, { "epoch": 0.73345573561108, "grad_norm": 0.26167010117811046, "learning_rate": 3.50055675264902e-06, "loss": 0.1736, "step": 9188 }, { "epoch": 0.7335355631835236, "grad_norm": 0.34685619394369555, "learning_rate": 3.498592067162135e-06, "loss": 0.1633, "step": 9189 }, { "epoch": 0.7336153907559672, "grad_norm": 0.2875538667758374, "learning_rate": 3.496627816268643e-06, "loss": 0.156, "step": 9190 }, { "epoch": 0.7336952183284107, "grad_norm": 0.3249264441365723, "learning_rate": 3.4946640000998445e-06, "loss": 0.1744, "step": 9191 }, { "epoch": 0.7337750459008542, "grad_norm": 0.2529284659230842, "learning_rate": 3.4927006187870126e-06, "loss": 0.1736, "step": 9192 }, { "epoch": 0.7338548734732977, "grad_norm": 0.29150624559527355, "learning_rate": 3.4907376724613896e-06, "loss": 0.1837, "step": 9193 }, { "epoch": 0.7339347010457412, "grad_norm": 0.2844209321623895, "learning_rate": 3.4887751612541967e-06, "loss": 0.1626, "step": 9194 }, { "epoch": 0.7340145286181847, "grad_norm": 0.3149533230089806, "learning_rate": 3.4868130852966154e-06, "loss": 0.142, "step": 9195 }, { "epoch": 0.7340943561906282, "grad_norm": 0.2979006983768973, "learning_rate": 3.4848514447198047e-06, "loss": 0.1986, "step": 9196 }, { "epoch": 0.7341741837630718, "grad_norm": 0.2848667332104085, "learning_rate": 3.4828902396548916e-06, "loss": 0.1743, "step": 9197 }, { "epoch": 0.7342540113355153, "grad_norm": 0.28705324221812545, "learning_rate": 3.480929470232972e-06, "loss": 0.1359, "step": 9198 }, { "epoch": 0.7343338389079588, "grad_norm": 0.2729082147591892, "learning_rate": 3.478969136585125e-06, "loss": 0.1504, "step": 9199 }, { "epoch": 0.7344136664804023, "grad_norm": 0.2961599501458581, "learning_rate": 3.4770092388423783e-06, "loss": 0.1826, "step": 9200 }, { "epoch": 0.7344934940528458, "grad_norm": 0.2951771023627549, "learning_rate": 3.475049777135753e-06, "loss": 0.1732, "step": 9201 }, { "epoch": 0.7345733216252893, "grad_norm": 0.34512407642611287, "learning_rate": 3.4730907515962242e-06, "loss": 0.1458, "step": 9202 }, { "epoch": 0.734653149197733, "grad_norm": 0.31804569914522407, "learning_rate": 3.4711321623547566e-06, "loss": 0.1435, "step": 9203 }, { "epoch": 0.7347329767701765, "grad_norm": 0.31244746218394687, "learning_rate": 3.469174009542259e-06, "loss": 0.1977, "step": 9204 }, { "epoch": 0.73481280434262, "grad_norm": 0.31410630257931216, "learning_rate": 3.467216293289637e-06, "loss": 0.2273, "step": 9205 }, { "epoch": 0.7348926319150635, "grad_norm": 0.31158780737166514, "learning_rate": 3.465259013727753e-06, "loss": 0.1895, "step": 9206 }, { "epoch": 0.734972459487507, "grad_norm": 0.2811952655894481, "learning_rate": 3.4633021709874405e-06, "loss": 0.1602, "step": 9207 }, { "epoch": 0.7350522870599505, "grad_norm": 0.24310221366302578, "learning_rate": 3.4613457651995153e-06, "loss": 0.1973, "step": 9208 }, { "epoch": 0.735132114632394, "grad_norm": 0.2779395108707327, "learning_rate": 3.459389796494743e-06, "loss": 0.169, "step": 9209 }, { "epoch": 0.7352119422048375, "grad_norm": 0.2791656249535736, "learning_rate": 3.4574342650038826e-06, "loss": 0.165, "step": 9210 }, { "epoch": 0.7352917697772811, "grad_norm": 0.28128276058801693, "learning_rate": 3.455479170857646e-06, "loss": 0.1522, "step": 9211 }, { "epoch": 0.7353715973497246, "grad_norm": 0.2951291701126983, "learning_rate": 3.4535245141867346e-06, "loss": 0.206, "step": 9212 }, { "epoch": 0.7354514249221681, "grad_norm": 0.24105044209922433, "learning_rate": 3.451570295121798e-06, "loss": 0.1386, "step": 9213 }, { "epoch": 0.7355312524946116, "grad_norm": 0.2615096529858847, "learning_rate": 3.4496165137934688e-06, "loss": 0.1482, "step": 9214 }, { "epoch": 0.7356110800670551, "grad_norm": 0.3101703266382361, "learning_rate": 3.447663170332358e-06, "loss": 0.1372, "step": 9215 }, { "epoch": 0.7356909076394987, "grad_norm": 0.2857807130874664, "learning_rate": 3.445710264869032e-06, "loss": 0.1788, "step": 9216 }, { "epoch": 0.7357707352119423, "grad_norm": 0.26820646604297566, "learning_rate": 3.4437577975340387e-06, "loss": 0.1516, "step": 9217 }, { "epoch": 0.7358505627843858, "grad_norm": 0.2782675989968943, "learning_rate": 3.4418057684578865e-06, "loss": 0.1642, "step": 9218 }, { "epoch": 0.7359303903568293, "grad_norm": 0.32487230696817876, "learning_rate": 3.439854177771068e-06, "loss": 0.1899, "step": 9219 }, { "epoch": 0.7360102179292728, "grad_norm": 0.28924618725394824, "learning_rate": 3.437903025604037e-06, "loss": 0.1619, "step": 9220 }, { "epoch": 0.7360900455017163, "grad_norm": 0.28160215199490574, "learning_rate": 3.4359523120872194e-06, "loss": 0.1979, "step": 9221 }, { "epoch": 0.7361698730741598, "grad_norm": 0.2774582885100359, "learning_rate": 3.434002037351013e-06, "loss": 0.1168, "step": 9222 }, { "epoch": 0.7362497006466033, "grad_norm": 0.29353816513673714, "learning_rate": 3.432052201525783e-06, "loss": 0.1604, "step": 9223 }, { "epoch": 0.7363295282190468, "grad_norm": 0.2850092883231127, "learning_rate": 3.430102804741874e-06, "loss": 0.1839, "step": 9224 }, { "epoch": 0.7364093557914904, "grad_norm": 0.3087563622932915, "learning_rate": 3.4281538471295927e-06, "loss": 0.1357, "step": 9225 }, { "epoch": 0.7364891833639339, "grad_norm": 0.33912614220460663, "learning_rate": 3.42620532881922e-06, "loss": 0.148, "step": 9226 }, { "epoch": 0.7365690109363774, "grad_norm": 0.2719081187357932, "learning_rate": 3.4242572499410055e-06, "loss": 0.138, "step": 9227 }, { "epoch": 0.7366488385088209, "grad_norm": 0.2675200617355949, "learning_rate": 3.4223096106251672e-06, "loss": 0.1238, "step": 9228 }, { "epoch": 0.7367286660812644, "grad_norm": 0.30751889803012994, "learning_rate": 3.420362411001904e-06, "loss": 0.1707, "step": 9229 }, { "epoch": 0.736808493653708, "grad_norm": 0.3028673283055286, "learning_rate": 3.4184156512013756e-06, "loss": 0.1844, "step": 9230 }, { "epoch": 0.7368883212261516, "grad_norm": 0.2519679275778063, "learning_rate": 3.4164693313537147e-06, "loss": 0.1745, "step": 9231 }, { "epoch": 0.7369681487985951, "grad_norm": 0.2868477308535638, "learning_rate": 3.4145234515890213e-06, "loss": 0.1577, "step": 9232 }, { "epoch": 0.7370479763710386, "grad_norm": 0.25142969994128833, "learning_rate": 3.4125780120373775e-06, "loss": 0.1538, "step": 9233 }, { "epoch": 0.7371278039434821, "grad_norm": 0.31271259204343965, "learning_rate": 3.4106330128288246e-06, "loss": 0.2017, "step": 9234 }, { "epoch": 0.7372076315159256, "grad_norm": 0.2695632893028304, "learning_rate": 3.4086884540933772e-06, "loss": 0.1484, "step": 9235 }, { "epoch": 0.7372874590883691, "grad_norm": 0.24415308780173225, "learning_rate": 3.406744335961022e-06, "loss": 0.1482, "step": 9236 }, { "epoch": 0.7373672866608126, "grad_norm": 0.328747494661866, "learning_rate": 3.4048006585617133e-06, "loss": 0.135, "step": 9237 }, { "epoch": 0.7374471142332562, "grad_norm": 0.2868394053136111, "learning_rate": 3.402857422025383e-06, "loss": 0.1271, "step": 9238 }, { "epoch": 0.7375269418056997, "grad_norm": 0.3036734308456617, "learning_rate": 3.4009146264819273e-06, "loss": 0.1484, "step": 9239 }, { "epoch": 0.7376067693781432, "grad_norm": 0.3658820967649234, "learning_rate": 3.3989722720612127e-06, "loss": 0.1981, "step": 9240 }, { "epoch": 0.7376865969505867, "grad_norm": 0.2712724493316848, "learning_rate": 3.397030358893075e-06, "loss": 0.1597, "step": 9241 }, { "epoch": 0.7377664245230302, "grad_norm": 0.29187234580664007, "learning_rate": 3.3950888871073304e-06, "loss": 0.149, "step": 9242 }, { "epoch": 0.7378462520954738, "grad_norm": 0.29528646318408464, "learning_rate": 3.3931478568337596e-06, "loss": 0.1765, "step": 9243 }, { "epoch": 0.7379260796679173, "grad_norm": 0.26873759492963983, "learning_rate": 3.391207268202101e-06, "loss": 0.1699, "step": 9244 }, { "epoch": 0.7380059072403609, "grad_norm": 0.30706390331741723, "learning_rate": 3.389267121342087e-06, "loss": 0.1805, "step": 9245 }, { "epoch": 0.7380857348128044, "grad_norm": 0.27233702460111636, "learning_rate": 3.3873274163834002e-06, "loss": 0.1423, "step": 9246 }, { "epoch": 0.7381655623852479, "grad_norm": 0.3070451421119744, "learning_rate": 3.385388153455713e-06, "loss": 0.1816, "step": 9247 }, { "epoch": 0.7382453899576914, "grad_norm": 0.27331791244986803, "learning_rate": 3.3834493326886453e-06, "loss": 0.182, "step": 9248 }, { "epoch": 0.7383252175301349, "grad_norm": 0.27755945576249624, "learning_rate": 3.3815109542118072e-06, "loss": 0.2303, "step": 9249 }, { "epoch": 0.7384050451025784, "grad_norm": 0.35130493283798503, "learning_rate": 3.3795730181547715e-06, "loss": 0.1527, "step": 9250 }, { "epoch": 0.7384848726750219, "grad_norm": 0.29191624080899525, "learning_rate": 3.377635524647075e-06, "loss": 0.1565, "step": 9251 }, { "epoch": 0.7385647002474655, "grad_norm": 0.27502436663800495, "learning_rate": 3.375698473818243e-06, "loss": 0.1579, "step": 9252 }, { "epoch": 0.738644527819909, "grad_norm": 0.3128922105867002, "learning_rate": 3.3737618657977465e-06, "loss": 0.1371, "step": 9253 }, { "epoch": 0.7387243553923525, "grad_norm": 0.27223865858839225, "learning_rate": 3.371825700715049e-06, "loss": 0.1637, "step": 9254 }, { "epoch": 0.738804182964796, "grad_norm": 0.25179585873137217, "learning_rate": 3.369889978699571e-06, "loss": 0.1425, "step": 9255 }, { "epoch": 0.7388840105372395, "grad_norm": 0.2877116296982481, "learning_rate": 3.3679546998807155e-06, "loss": 0.1676, "step": 9256 }, { "epoch": 0.7389638381096831, "grad_norm": 0.2933444422954214, "learning_rate": 3.3660198643878395e-06, "loss": 0.196, "step": 9257 }, { "epoch": 0.7390436656821266, "grad_norm": 0.3312782202899221, "learning_rate": 3.364085472350278e-06, "loss": 0.1117, "step": 9258 }, { "epoch": 0.7391234932545702, "grad_norm": 0.3130218495326193, "learning_rate": 3.3621515238973456e-06, "loss": 0.1912, "step": 9259 }, { "epoch": 0.7392033208270137, "grad_norm": 0.3040563440944265, "learning_rate": 3.3602180191583154e-06, "loss": 0.1245, "step": 9260 }, { "epoch": 0.7392831483994572, "grad_norm": 0.36209649034325037, "learning_rate": 3.3582849582624332e-06, "loss": 0.2102, "step": 9261 }, { "epoch": 0.7393629759719007, "grad_norm": 0.3046683997451228, "learning_rate": 3.3563523413389152e-06, "loss": 0.1719, "step": 9262 }, { "epoch": 0.7394428035443442, "grad_norm": 0.2676269424750344, "learning_rate": 3.3544201685169543e-06, "loss": 0.1419, "step": 9263 }, { "epoch": 0.7395226311167877, "grad_norm": 0.28567210138267457, "learning_rate": 3.352488439925705e-06, "loss": 0.1998, "step": 9264 }, { "epoch": 0.7396024586892312, "grad_norm": 0.2813275364941617, "learning_rate": 3.350557155694297e-06, "loss": 0.1418, "step": 9265 }, { "epoch": 0.7396822862616748, "grad_norm": 0.3216676057791766, "learning_rate": 3.3486263159518294e-06, "loss": 0.1481, "step": 9266 }, { "epoch": 0.7397621138341183, "grad_norm": 0.28115123086859956, "learning_rate": 3.3466959208273662e-06, "loss": 0.1692, "step": 9267 }, { "epoch": 0.7398419414065618, "grad_norm": 0.34773488184680934, "learning_rate": 3.3447659704499546e-06, "loss": 0.1812, "step": 9268 }, { "epoch": 0.7399217689790053, "grad_norm": 0.29471469026607117, "learning_rate": 3.3428364649486e-06, "loss": 0.1632, "step": 9269 }, { "epoch": 0.7400015965514489, "grad_norm": 0.2896709669303309, "learning_rate": 3.3409074044522815e-06, "loss": 0.1628, "step": 9270 }, { "epoch": 0.7400814241238924, "grad_norm": 0.2445704758264057, "learning_rate": 3.3389787890899483e-06, "loss": 0.1058, "step": 9271 }, { "epoch": 0.740161251696336, "grad_norm": 0.28484505081079037, "learning_rate": 3.337050618990525e-06, "loss": 0.1608, "step": 9272 }, { "epoch": 0.7402410792687795, "grad_norm": 0.2837508927651048, "learning_rate": 3.3351228942829004e-06, "loss": 0.1882, "step": 9273 }, { "epoch": 0.740320906841223, "grad_norm": 0.3233512953749474, "learning_rate": 3.3331956150959345e-06, "loss": 0.1606, "step": 9274 }, { "epoch": 0.7404007344136665, "grad_norm": 0.3907930904540319, "learning_rate": 3.3312687815584576e-06, "loss": 0.1798, "step": 9275 }, { "epoch": 0.74048056198611, "grad_norm": 0.3348808809130757, "learning_rate": 3.3293423937992687e-06, "loss": 0.1658, "step": 9276 }, { "epoch": 0.7405603895585535, "grad_norm": 0.3068531043838832, "learning_rate": 3.3274164519471465e-06, "loss": 0.1068, "step": 9277 }, { "epoch": 0.740640217130997, "grad_norm": 0.3046221216270799, "learning_rate": 3.3254909561308268e-06, "loss": 0.1398, "step": 9278 }, { "epoch": 0.7407200447034405, "grad_norm": 0.3554841563714897, "learning_rate": 3.323565906479024e-06, "loss": 0.16, "step": 9279 }, { "epoch": 0.7407998722758841, "grad_norm": 0.29787129315430627, "learning_rate": 3.3216413031204177e-06, "loss": 0.2028, "step": 9280 }, { "epoch": 0.7408796998483276, "grad_norm": 0.29161916997317255, "learning_rate": 3.3197171461836587e-06, "loss": 0.2125, "step": 9281 }, { "epoch": 0.7409595274207711, "grad_norm": 0.28915906071028524, "learning_rate": 3.317793435797375e-06, "loss": 0.179, "step": 9282 }, { "epoch": 0.7410393549932146, "grad_norm": 0.29323965608921126, "learning_rate": 3.3158701720901553e-06, "loss": 0.1842, "step": 9283 }, { "epoch": 0.7411191825656582, "grad_norm": 0.27072385866650245, "learning_rate": 3.313947355190563e-06, "loss": 0.1434, "step": 9284 }, { "epoch": 0.7411990101381017, "grad_norm": 0.3079700934607975, "learning_rate": 3.312024985227127e-06, "loss": 0.1719, "step": 9285 }, { "epoch": 0.7412788377105453, "grad_norm": 0.31662623898232334, "learning_rate": 3.310103062328358e-06, "loss": 0.1694, "step": 9286 }, { "epoch": 0.7413586652829888, "grad_norm": 0.2952306167035285, "learning_rate": 3.308181586622726e-06, "loss": 0.1382, "step": 9287 }, { "epoch": 0.7414384928554323, "grad_norm": 0.2862746345015024, "learning_rate": 3.3062605582386675e-06, "loss": 0.1741, "step": 9288 }, { "epoch": 0.7415183204278758, "grad_norm": 0.2909368006244651, "learning_rate": 3.3043399773046036e-06, "loss": 0.1304, "step": 9289 }, { "epoch": 0.7415981480003193, "grad_norm": 0.299348794426679, "learning_rate": 3.3024198439489107e-06, "loss": 0.1499, "step": 9290 }, { "epoch": 0.7416779755727628, "grad_norm": 0.25961745964350885, "learning_rate": 3.300500158299954e-06, "loss": 0.1311, "step": 9291 }, { "epoch": 0.7417578031452063, "grad_norm": 0.3389659803254087, "learning_rate": 3.298580920486041e-06, "loss": 0.1449, "step": 9292 }, { "epoch": 0.7418376307176499, "grad_norm": 0.2863115090521182, "learning_rate": 3.2966621306354773e-06, "loss": 0.1568, "step": 9293 }, { "epoch": 0.7419174582900934, "grad_norm": 0.3061553350372271, "learning_rate": 3.294743788876522e-06, "loss": 0.1666, "step": 9294 }, { "epoch": 0.7419972858625369, "grad_norm": 0.27616997279264743, "learning_rate": 3.292825895337408e-06, "loss": 0.1445, "step": 9295 }, { "epoch": 0.7420771134349804, "grad_norm": 0.32125123961313795, "learning_rate": 3.2909084501463407e-06, "loss": 0.1543, "step": 9296 }, { "epoch": 0.7421569410074239, "grad_norm": 0.25771829865376245, "learning_rate": 3.2889914534314904e-06, "loss": 0.1392, "step": 9297 }, { "epoch": 0.7422367685798675, "grad_norm": 0.30598549788762003, "learning_rate": 3.287074905321005e-06, "loss": 0.24, "step": 9298 }, { "epoch": 0.742316596152311, "grad_norm": 0.24592537616814866, "learning_rate": 3.2851588059429928e-06, "loss": 0.1612, "step": 9299 }, { "epoch": 0.7423964237247546, "grad_norm": 0.2967675985983142, "learning_rate": 3.283243155425548e-06, "loss": 0.1602, "step": 9300 }, { "epoch": 0.7424762512971981, "grad_norm": 0.29901729090024476, "learning_rate": 3.2813279538967113e-06, "loss": 0.1666, "step": 9301 }, { "epoch": 0.7425560788696416, "grad_norm": 0.30541490988754605, "learning_rate": 3.279413201484515e-06, "loss": 0.203, "step": 9302 }, { "epoch": 0.7426359064420851, "grad_norm": 0.28147223332919613, "learning_rate": 3.277498898316951e-06, "loss": 0.1868, "step": 9303 }, { "epoch": 0.7427157340145286, "grad_norm": 0.26454423854200027, "learning_rate": 3.2755850445219807e-06, "loss": 0.1629, "step": 9304 }, { "epoch": 0.7427955615869721, "grad_norm": 0.30303079193171584, "learning_rate": 3.273671640227538e-06, "loss": 0.1916, "step": 9305 }, { "epoch": 0.7428753891594156, "grad_norm": 0.27063809901264957, "learning_rate": 3.2717586855615258e-06, "loss": 0.1188, "step": 9306 }, { "epoch": 0.7429552167318592, "grad_norm": 0.24824521418136308, "learning_rate": 3.2698461806518213e-06, "loss": 0.1739, "step": 9307 }, { "epoch": 0.7430350443043027, "grad_norm": 0.28626115389424367, "learning_rate": 3.2679341256262653e-06, "loss": 0.144, "step": 9308 }, { "epoch": 0.7431148718767462, "grad_norm": 0.2902509353435157, "learning_rate": 3.2660225206126717e-06, "loss": 0.2151, "step": 9309 }, { "epoch": 0.7431946994491897, "grad_norm": 0.25122440968137266, "learning_rate": 3.2641113657388235e-06, "loss": 0.1828, "step": 9310 }, { "epoch": 0.7432745270216333, "grad_norm": 0.37441026131448607, "learning_rate": 3.262200661132471e-06, "loss": 0.1832, "step": 9311 }, { "epoch": 0.7433543545940768, "grad_norm": 0.2780016294698959, "learning_rate": 3.2602904069213424e-06, "loss": 0.1243, "step": 9312 }, { "epoch": 0.7434341821665204, "grad_norm": 0.2729839473547319, "learning_rate": 3.258380603233129e-06, "loss": 0.1948, "step": 9313 }, { "epoch": 0.7435140097389639, "grad_norm": 0.26984200627957705, "learning_rate": 3.256471250195492e-06, "loss": 0.1489, "step": 9314 }, { "epoch": 0.7435938373114074, "grad_norm": 0.31030509561275865, "learning_rate": 3.254562347936062e-06, "loss": 0.1676, "step": 9315 }, { "epoch": 0.7436736648838509, "grad_norm": 0.24517507109277525, "learning_rate": 3.2526538965824485e-06, "loss": 0.1515, "step": 9316 }, { "epoch": 0.7437534924562944, "grad_norm": 0.2739159124745501, "learning_rate": 3.2507458962622186e-06, "loss": 0.145, "step": 9317 }, { "epoch": 0.7438333200287379, "grad_norm": 0.37757635330005934, "learning_rate": 3.248838347102916e-06, "loss": 0.1605, "step": 9318 }, { "epoch": 0.7439131476011814, "grad_norm": 0.2684745675751104, "learning_rate": 3.2469312492320535e-06, "loss": 0.1999, "step": 9319 }, { "epoch": 0.743992975173625, "grad_norm": 0.28299637400987415, "learning_rate": 3.2450246027771095e-06, "loss": 0.1513, "step": 9320 }, { "epoch": 0.7440728027460685, "grad_norm": 0.27060539417529783, "learning_rate": 3.243118407865541e-06, "loss": 0.1956, "step": 9321 }, { "epoch": 0.744152630318512, "grad_norm": 0.27930061893771035, "learning_rate": 3.2412126646247666e-06, "loss": 0.1795, "step": 9322 }, { "epoch": 0.7442324578909555, "grad_norm": 0.26640893254114684, "learning_rate": 3.239307373182179e-06, "loss": 0.1286, "step": 9323 }, { "epoch": 0.744312285463399, "grad_norm": 0.2924037386772849, "learning_rate": 3.2374025336651384e-06, "loss": 0.1537, "step": 9324 }, { "epoch": 0.7443921130358426, "grad_norm": 0.2748038829967667, "learning_rate": 3.2354981462009717e-06, "loss": 0.1839, "step": 9325 }, { "epoch": 0.7444719406082861, "grad_norm": 0.27689127918151735, "learning_rate": 3.2335942109169884e-06, "loss": 0.1553, "step": 9326 }, { "epoch": 0.7445517681807297, "grad_norm": 0.3230863329943614, "learning_rate": 3.231690727940454e-06, "loss": 0.1835, "step": 9327 }, { "epoch": 0.7446315957531732, "grad_norm": 0.29578131996954155, "learning_rate": 3.2297876973986096e-06, "loss": 0.175, "step": 9328 }, { "epoch": 0.7447114233256167, "grad_norm": 0.2601262976915519, "learning_rate": 3.227885119418662e-06, "loss": 0.1788, "step": 9329 }, { "epoch": 0.7447912508980602, "grad_norm": 0.24950766986547301, "learning_rate": 3.2259829941277974e-06, "loss": 0.171, "step": 9330 }, { "epoch": 0.7448710784705037, "grad_norm": 0.2891316694761745, "learning_rate": 3.224081321653164e-06, "loss": 0.1649, "step": 9331 }, { "epoch": 0.7449509060429472, "grad_norm": 0.33193404920693637, "learning_rate": 3.2221801021218748e-06, "loss": 0.1804, "step": 9332 }, { "epoch": 0.7450307336153907, "grad_norm": 0.3286987757111518, "learning_rate": 3.220279335661024e-06, "loss": 0.1591, "step": 9333 }, { "epoch": 0.7451105611878343, "grad_norm": 0.27333601159841814, "learning_rate": 3.2183790223976675e-06, "loss": 0.157, "step": 9334 }, { "epoch": 0.7451903887602778, "grad_norm": 0.2819606594361355, "learning_rate": 3.216479162458842e-06, "loss": 0.1502, "step": 9335 }, { "epoch": 0.7452702163327213, "grad_norm": 0.32719890489252296, "learning_rate": 3.214579755971532e-06, "loss": 0.1656, "step": 9336 }, { "epoch": 0.7453500439051648, "grad_norm": 0.27336060975365783, "learning_rate": 3.2126808030627165e-06, "loss": 0.1548, "step": 9337 }, { "epoch": 0.7454298714776084, "grad_norm": 0.337301175985208, "learning_rate": 3.2107823038593256e-06, "loss": 0.1543, "step": 9338 }, { "epoch": 0.7455096990500519, "grad_norm": 0.306929532303817, "learning_rate": 3.208884258488276e-06, "loss": 0.1659, "step": 9339 }, { "epoch": 0.7455895266224954, "grad_norm": 0.2775334119473768, "learning_rate": 3.2069866670764362e-06, "loss": 0.1739, "step": 9340 }, { "epoch": 0.745669354194939, "grad_norm": 0.300349666769899, "learning_rate": 3.2050895297506512e-06, "loss": 0.1471, "step": 9341 }, { "epoch": 0.7457491817673825, "grad_norm": 0.24491253385076114, "learning_rate": 3.2031928466377436e-06, "loss": 0.1526, "step": 9342 }, { "epoch": 0.745829009339826, "grad_norm": 0.30855685586178, "learning_rate": 3.2012966178644933e-06, "loss": 0.1722, "step": 9343 }, { "epoch": 0.7459088369122695, "grad_norm": 0.288752200149035, "learning_rate": 3.1994008435576653e-06, "loss": 0.1391, "step": 9344 }, { "epoch": 0.745988664484713, "grad_norm": 0.24274622161699538, "learning_rate": 3.197505523843971e-06, "loss": 0.1633, "step": 9345 }, { "epoch": 0.7460684920571565, "grad_norm": 0.360731702439121, "learning_rate": 3.195610658850117e-06, "loss": 0.1868, "step": 9346 }, { "epoch": 0.7461483196296, "grad_norm": 0.31787219564733166, "learning_rate": 3.1937162487027605e-06, "loss": 0.1394, "step": 9347 }, { "epoch": 0.7462281472020436, "grad_norm": 0.2982165432058537, "learning_rate": 3.191822293528539e-06, "loss": 0.1632, "step": 9348 }, { "epoch": 0.7463079747744871, "grad_norm": 0.27026675490283486, "learning_rate": 3.1899287934540536e-06, "loss": 0.1573, "step": 9349 }, { "epoch": 0.7463878023469306, "grad_norm": 0.29766671029880143, "learning_rate": 3.1880357486058753e-06, "loss": 0.1487, "step": 9350 }, { "epoch": 0.7464676299193741, "grad_norm": 0.2708738305471085, "learning_rate": 3.186143159110552e-06, "loss": 0.1349, "step": 9351 }, { "epoch": 0.7465474574918177, "grad_norm": 0.3027304355457705, "learning_rate": 3.1842510250945935e-06, "loss": 0.1133, "step": 9352 }, { "epoch": 0.7466272850642612, "grad_norm": 0.3119503681334708, "learning_rate": 3.1823593466844817e-06, "loss": 0.1814, "step": 9353 }, { "epoch": 0.7467071126367048, "grad_norm": 0.3206292061977817, "learning_rate": 3.180468124006667e-06, "loss": 0.1719, "step": 9354 }, { "epoch": 0.7467869402091483, "grad_norm": 0.2762491721768317, "learning_rate": 3.178577357187567e-06, "loss": 0.1355, "step": 9355 }, { "epoch": 0.7468667677815918, "grad_norm": 0.27106213705155147, "learning_rate": 3.176687046353578e-06, "loss": 0.1391, "step": 9356 }, { "epoch": 0.7469465953540353, "grad_norm": 0.2848542918596435, "learning_rate": 3.1747971916310583e-06, "loss": 0.1421, "step": 9357 }, { "epoch": 0.7470264229264788, "grad_norm": 0.29386559334193973, "learning_rate": 3.172907793146336e-06, "loss": 0.1639, "step": 9358 }, { "epoch": 0.7471062504989223, "grad_norm": 0.28725910172537, "learning_rate": 3.1710188510257054e-06, "loss": 0.1138, "step": 9359 }, { "epoch": 0.7471860780713658, "grad_norm": 0.3054371922184845, "learning_rate": 3.1691303653954443e-06, "loss": 0.124, "step": 9360 }, { "epoch": 0.7472659056438093, "grad_norm": 0.3064248210668356, "learning_rate": 3.167242336381785e-06, "loss": 0.1088, "step": 9361 }, { "epoch": 0.7473457332162529, "grad_norm": 0.28892784886543155, "learning_rate": 3.165354764110936e-06, "loss": 0.1566, "step": 9362 }, { "epoch": 0.7474255607886964, "grad_norm": 0.33039788047568025, "learning_rate": 3.163467648709073e-06, "loss": 0.169, "step": 9363 }, { "epoch": 0.7475053883611399, "grad_norm": 0.300147732286894, "learning_rate": 3.1615809903023385e-06, "loss": 0.1542, "step": 9364 }, { "epoch": 0.7475852159335835, "grad_norm": 0.30891076250839683, "learning_rate": 3.1596947890168573e-06, "loss": 0.2097, "step": 9365 }, { "epoch": 0.747665043506027, "grad_norm": 0.24565959444180122, "learning_rate": 3.157809044978708e-06, "loss": 0.2073, "step": 9366 }, { "epoch": 0.7477448710784705, "grad_norm": 0.3095970975758469, "learning_rate": 3.155923758313948e-06, "loss": 0.158, "step": 9367 }, { "epoch": 0.747824698650914, "grad_norm": 0.27235256462933766, "learning_rate": 3.1540389291485953e-06, "loss": 0.1735, "step": 9368 }, { "epoch": 0.7479045262233576, "grad_norm": 0.2864322576915817, "learning_rate": 3.152154557608651e-06, "loss": 0.1732, "step": 9369 }, { "epoch": 0.7479843537958011, "grad_norm": 0.29338285660367713, "learning_rate": 3.1502706438200758e-06, "loss": 0.2158, "step": 9370 }, { "epoch": 0.7480641813682446, "grad_norm": 0.260218488271741, "learning_rate": 3.1483871879088003e-06, "loss": 0.1371, "step": 9371 }, { "epoch": 0.7481440089406881, "grad_norm": 0.30328341202337916, "learning_rate": 3.1465041900007264e-06, "loss": 0.1674, "step": 9372 }, { "epoch": 0.7482238365131316, "grad_norm": 0.37795659272502474, "learning_rate": 3.1446216502217218e-06, "loss": 0.1793, "step": 9373 }, { "epoch": 0.7483036640855751, "grad_norm": 0.3033631749086413, "learning_rate": 3.1427395686976337e-06, "loss": 0.1639, "step": 9374 }, { "epoch": 0.7483834916580187, "grad_norm": 0.3032802321580951, "learning_rate": 3.140857945554269e-06, "loss": 0.1784, "step": 9375 }, { "epoch": 0.7484633192304622, "grad_norm": 0.29721423286823706, "learning_rate": 3.1389767809174053e-06, "loss": 0.1917, "step": 9376 }, { "epoch": 0.7485431468029057, "grad_norm": 0.31290118618584256, "learning_rate": 3.1370960749127923e-06, "loss": 0.1306, "step": 9377 }, { "epoch": 0.7486229743753492, "grad_norm": 0.29842700989976373, "learning_rate": 3.1352158276661435e-06, "loss": 0.1762, "step": 9378 }, { "epoch": 0.7487028019477928, "grad_norm": 0.2977771663128387, "learning_rate": 3.1333360393031566e-06, "loss": 0.1844, "step": 9379 }, { "epoch": 0.7487826295202363, "grad_norm": 0.28803664419257224, "learning_rate": 3.1314567099494752e-06, "loss": 0.1714, "step": 9380 }, { "epoch": 0.7488624570926798, "grad_norm": 0.2619438240029612, "learning_rate": 3.1295778397307343e-06, "loss": 0.1612, "step": 9381 }, { "epoch": 0.7489422846651234, "grad_norm": 0.28416965041920905, "learning_rate": 3.1276994287725213e-06, "loss": 0.1712, "step": 9382 }, { "epoch": 0.7490221122375669, "grad_norm": 0.2770099461309578, "learning_rate": 3.1258214772004115e-06, "loss": 0.1551, "step": 9383 }, { "epoch": 0.7491019398100104, "grad_norm": 0.285518134294997, "learning_rate": 3.12394398513993e-06, "loss": 0.1691, "step": 9384 }, { "epoch": 0.7491817673824539, "grad_norm": 0.2757675129544256, "learning_rate": 3.122066952716577e-06, "loss": 0.1712, "step": 9385 }, { "epoch": 0.7492615949548974, "grad_norm": 0.26411899058373356, "learning_rate": 3.120190380055833e-06, "loss": 0.1128, "step": 9386 }, { "epoch": 0.7493414225273409, "grad_norm": 0.2717305863394131, "learning_rate": 3.1183142672831356e-06, "loss": 0.1261, "step": 9387 }, { "epoch": 0.7494212500997844, "grad_norm": 0.3170684509308362, "learning_rate": 3.1164386145238967e-06, "loss": 0.2015, "step": 9388 }, { "epoch": 0.749501077672228, "grad_norm": 0.276792065347524, "learning_rate": 3.1145634219034915e-06, "loss": 0.1413, "step": 9389 }, { "epoch": 0.7495809052446715, "grad_norm": 0.26765038742954556, "learning_rate": 3.1126886895472754e-06, "loss": 0.1886, "step": 9390 }, { "epoch": 0.749660732817115, "grad_norm": 0.25630805961556247, "learning_rate": 3.1108144175805653e-06, "loss": 0.1536, "step": 9391 }, { "epoch": 0.7497405603895586, "grad_norm": 0.2918966241512145, "learning_rate": 3.1089406061286475e-06, "loss": 0.1892, "step": 9392 }, { "epoch": 0.7498203879620021, "grad_norm": 0.2871773971351831, "learning_rate": 3.107067255316779e-06, "loss": 0.1297, "step": 9393 }, { "epoch": 0.7499002155344456, "grad_norm": 0.331238149326382, "learning_rate": 3.105194365270184e-06, "loss": 0.2143, "step": 9394 }, { "epoch": 0.7499800431068891, "grad_norm": 0.2796469667890591, "learning_rate": 3.103321936114063e-06, "loss": 0.1708, "step": 9395 }, { "epoch": 0.7500598706793327, "grad_norm": 0.2630953530776384, "learning_rate": 3.1014499679735764e-06, "loss": 0.1596, "step": 9396 }, { "epoch": 0.7501396982517762, "grad_norm": 0.33972200124733254, "learning_rate": 3.0995784609738598e-06, "loss": 0.1793, "step": 9397 }, { "epoch": 0.7502195258242197, "grad_norm": 0.29310145669429283, "learning_rate": 3.0977074152400156e-06, "loss": 0.1935, "step": 9398 }, { "epoch": 0.7502993533966632, "grad_norm": 0.3049370293442647, "learning_rate": 3.095836830897112e-06, "loss": 0.1498, "step": 9399 }, { "epoch": 0.7503791809691067, "grad_norm": 0.33938853196530894, "learning_rate": 3.0939667080701965e-06, "loss": 0.1387, "step": 9400 }, { "epoch": 0.7504590085415502, "grad_norm": 0.3039147820729381, "learning_rate": 3.092097046884277e-06, "loss": 0.1642, "step": 9401 }, { "epoch": 0.7505388361139937, "grad_norm": 0.27529770233624784, "learning_rate": 3.090227847464332e-06, "loss": 0.1537, "step": 9402 }, { "epoch": 0.7506186636864373, "grad_norm": 0.2718048479183037, "learning_rate": 3.088359109935307e-06, "loss": 0.1412, "step": 9403 }, { "epoch": 0.7506984912588808, "grad_norm": 0.278744724701382, "learning_rate": 3.086490834422127e-06, "loss": 0.1863, "step": 9404 }, { "epoch": 0.7507783188313243, "grad_norm": 0.275935057778309, "learning_rate": 3.0846230210496743e-06, "loss": 0.1401, "step": 9405 }, { "epoch": 0.7508581464037679, "grad_norm": 0.31357016320347136, "learning_rate": 3.082755669942806e-06, "loss": 0.153, "step": 9406 }, { "epoch": 0.7509379739762114, "grad_norm": 0.2517106538647137, "learning_rate": 3.0808887812263466e-06, "loss": 0.2064, "step": 9407 }, { "epoch": 0.7510178015486549, "grad_norm": 0.2935532551431425, "learning_rate": 3.0790223550250874e-06, "loss": 0.186, "step": 9408 }, { "epoch": 0.7510976291210985, "grad_norm": 0.3178380659792717, "learning_rate": 3.0771563914637982e-06, "loss": 0.1439, "step": 9409 }, { "epoch": 0.751177456693542, "grad_norm": 0.25217190180087173, "learning_rate": 3.0752908906672072e-06, "loss": 0.1435, "step": 9410 }, { "epoch": 0.7512572842659855, "grad_norm": 0.25246240505542633, "learning_rate": 3.073425852760016e-06, "loss": 0.1672, "step": 9411 }, { "epoch": 0.751337111838429, "grad_norm": 0.2442581278593417, "learning_rate": 3.071561277866891e-06, "loss": 0.1508, "step": 9412 }, { "epoch": 0.7514169394108725, "grad_norm": 0.30429699317497577, "learning_rate": 3.069697166112481e-06, "loss": 0.1385, "step": 9413 }, { "epoch": 0.751496766983316, "grad_norm": 0.32360025641520623, "learning_rate": 3.0678335176213914e-06, "loss": 0.1668, "step": 9414 }, { "epoch": 0.7515765945557595, "grad_norm": 0.275087306800631, "learning_rate": 3.065970332518191e-06, "loss": 0.1425, "step": 9415 }, { "epoch": 0.751656422128203, "grad_norm": 0.3179603665405598, "learning_rate": 3.064107610927437e-06, "loss": 0.1648, "step": 9416 }, { "epoch": 0.7517362497006466, "grad_norm": 0.2659269814151909, "learning_rate": 3.0622453529736375e-06, "loss": 0.1751, "step": 9417 }, { "epoch": 0.7518160772730901, "grad_norm": 0.3444077534647305, "learning_rate": 3.0603835587812823e-06, "loss": 0.1649, "step": 9418 }, { "epoch": 0.7518959048455337, "grad_norm": 0.29437380190480045, "learning_rate": 3.0585222284748253e-06, "loss": 0.1622, "step": 9419 }, { "epoch": 0.7519757324179772, "grad_norm": 0.28709265322564553, "learning_rate": 3.0566613621786855e-06, "loss": 0.1784, "step": 9420 }, { "epoch": 0.7520555599904207, "grad_norm": 0.27288966579258706, "learning_rate": 3.0548009600172567e-06, "loss": 0.1874, "step": 9421 }, { "epoch": 0.7521353875628642, "grad_norm": 0.28868002524100295, "learning_rate": 3.0529410221148958e-06, "loss": 0.1752, "step": 9422 }, { "epoch": 0.7522152151353078, "grad_norm": 0.3170282883348225, "learning_rate": 3.05108154859594e-06, "loss": 0.2044, "step": 9423 }, { "epoch": 0.7522950427077513, "grad_norm": 0.30817815787129715, "learning_rate": 3.0492225395846777e-06, "loss": 0.1695, "step": 9424 }, { "epoch": 0.7523748702801948, "grad_norm": 0.30384371936539534, "learning_rate": 3.0473639952053845e-06, "loss": 0.2188, "step": 9425 }, { "epoch": 0.7524546978526383, "grad_norm": 0.23830305547940073, "learning_rate": 3.045505915582291e-06, "loss": 0.2052, "step": 9426 }, { "epoch": 0.7525345254250818, "grad_norm": 0.29023718780044044, "learning_rate": 3.0436483008396102e-06, "loss": 0.1735, "step": 9427 }, { "epoch": 0.7526143529975253, "grad_norm": 0.3140307023871816, "learning_rate": 3.0417911511015085e-06, "loss": 0.1787, "step": 9428 }, { "epoch": 0.7526941805699688, "grad_norm": 0.2684586584510596, "learning_rate": 3.0399344664921293e-06, "loss": 0.151, "step": 9429 }, { "epoch": 0.7527740081424124, "grad_norm": 0.2797404972513905, "learning_rate": 3.03807824713559e-06, "loss": 0.1571, "step": 9430 }, { "epoch": 0.7528538357148559, "grad_norm": 0.26057809638897084, "learning_rate": 3.0362224931559693e-06, "loss": 0.1339, "step": 9431 }, { "epoch": 0.7529336632872994, "grad_norm": 0.2680840645950162, "learning_rate": 3.0343672046773152e-06, "loss": 0.1404, "step": 9432 }, { "epoch": 0.753013490859743, "grad_norm": 0.2830946673565735, "learning_rate": 3.0325123818236448e-06, "loss": 0.1832, "step": 9433 }, { "epoch": 0.7530933184321865, "grad_norm": 0.32464746603129196, "learning_rate": 3.030658024718951e-06, "loss": 0.1415, "step": 9434 }, { "epoch": 0.75317314600463, "grad_norm": 0.2881885300087177, "learning_rate": 3.0288041334871856e-06, "loss": 0.1782, "step": 9435 }, { "epoch": 0.7532529735770735, "grad_norm": 0.282278834277899, "learning_rate": 3.026950708252281e-06, "loss": 0.1456, "step": 9436 }, { "epoch": 0.7533328011495171, "grad_norm": 0.33873114714908364, "learning_rate": 3.025097749138124e-06, "loss": 0.1632, "step": 9437 }, { "epoch": 0.7534126287219606, "grad_norm": 0.2679378136658794, "learning_rate": 3.0232452562685755e-06, "loss": 0.1724, "step": 9438 }, { "epoch": 0.7534924562944041, "grad_norm": 0.30602082547388754, "learning_rate": 3.0213932297674765e-06, "loss": 0.1222, "step": 9439 }, { "epoch": 0.7535722838668476, "grad_norm": 0.33080643013555205, "learning_rate": 3.0195416697586213e-06, "loss": 0.1875, "step": 9440 }, { "epoch": 0.7536521114392911, "grad_norm": 0.2740923072333613, "learning_rate": 3.0176905763657815e-06, "loss": 0.1582, "step": 9441 }, { "epoch": 0.7537319390117346, "grad_norm": 0.29716072980730074, "learning_rate": 3.015839949712691e-06, "loss": 0.153, "step": 9442 }, { "epoch": 0.7538117665841781, "grad_norm": 0.27446186607574746, "learning_rate": 3.013989789923065e-06, "loss": 0.1606, "step": 9443 }, { "epoch": 0.7538915941566217, "grad_norm": 0.26768697834719857, "learning_rate": 3.012140097120574e-06, "loss": 0.2237, "step": 9444 }, { "epoch": 0.7539714217290652, "grad_norm": 0.2767341164767624, "learning_rate": 3.0102908714288636e-06, "loss": 0.2098, "step": 9445 }, { "epoch": 0.7540512493015087, "grad_norm": 0.277425522245485, "learning_rate": 3.008442112971548e-06, "loss": 0.1633, "step": 9446 }, { "epoch": 0.7541310768739523, "grad_norm": 0.2882318599411335, "learning_rate": 3.0065938218722047e-06, "loss": 0.1791, "step": 9447 }, { "epoch": 0.7542109044463958, "grad_norm": 0.2853867835506935, "learning_rate": 3.0047459982543915e-06, "loss": 0.1778, "step": 9448 }, { "epoch": 0.7542907320188393, "grad_norm": 0.27553845458662574, "learning_rate": 3.0028986422416263e-06, "loss": 0.1392, "step": 9449 }, { "epoch": 0.7543705595912829, "grad_norm": 0.31440953120234144, "learning_rate": 3.0010517539573947e-06, "loss": 0.1654, "step": 9450 }, { "epoch": 0.7544503871637264, "grad_norm": 0.27693944375025914, "learning_rate": 2.999205333525157e-06, "loss": 0.189, "step": 9451 }, { "epoch": 0.7545302147361699, "grad_norm": 0.2738702050842127, "learning_rate": 2.997359381068334e-06, "loss": 0.1641, "step": 9452 }, { "epoch": 0.7546100423086134, "grad_norm": 0.28848957572659695, "learning_rate": 2.995513896710327e-06, "loss": 0.1704, "step": 9453 }, { "epoch": 0.7546898698810569, "grad_norm": 0.33400784057058225, "learning_rate": 2.9936688805744963e-06, "loss": 0.2021, "step": 9454 }, { "epoch": 0.7547696974535004, "grad_norm": 0.2488551520307474, "learning_rate": 2.9918243327841747e-06, "loss": 0.1606, "step": 9455 }, { "epoch": 0.7548495250259439, "grad_norm": 0.2775887793817294, "learning_rate": 2.9899802534626575e-06, "loss": 0.1557, "step": 9456 }, { "epoch": 0.7549293525983874, "grad_norm": 0.29336738860992484, "learning_rate": 2.9881366427332226e-06, "loss": 0.1402, "step": 9457 }, { "epoch": 0.755009180170831, "grad_norm": 0.3002124720717408, "learning_rate": 2.9862935007191075e-06, "loss": 0.1496, "step": 9458 }, { "epoch": 0.7550890077432745, "grad_norm": 0.30536475405408386, "learning_rate": 2.98445082754351e-06, "loss": 0.1584, "step": 9459 }, { "epoch": 0.7551688353157181, "grad_norm": 0.28626649355395295, "learning_rate": 2.9826086233296136e-06, "loss": 0.1619, "step": 9460 }, { "epoch": 0.7552486628881616, "grad_norm": 0.3014061399847116, "learning_rate": 2.9807668882005567e-06, "loss": 0.1505, "step": 9461 }, { "epoch": 0.7553284904606051, "grad_norm": 0.2945777921375378, "learning_rate": 2.978925622279458e-06, "loss": 0.2093, "step": 9462 }, { "epoch": 0.7554083180330486, "grad_norm": 0.24714834073446193, "learning_rate": 2.977084825689397e-06, "loss": 0.1464, "step": 9463 }, { "epoch": 0.7554881456054922, "grad_norm": 0.31422365075678055, "learning_rate": 2.9752444985534223e-06, "loss": 0.1236, "step": 9464 }, { "epoch": 0.7555679731779357, "grad_norm": 0.30341068109879515, "learning_rate": 2.9734046409945506e-06, "loss": 0.1354, "step": 9465 }, { "epoch": 0.7556478007503792, "grad_norm": 0.3087118811285341, "learning_rate": 2.971565253135774e-06, "loss": 0.18, "step": 9466 }, { "epoch": 0.7557276283228227, "grad_norm": 0.26953157483080914, "learning_rate": 2.969726335100048e-06, "loss": 0.1371, "step": 9467 }, { "epoch": 0.7558074558952662, "grad_norm": 0.2690723088890062, "learning_rate": 2.96788788701029e-06, "loss": 0.1897, "step": 9468 }, { "epoch": 0.7558872834677097, "grad_norm": 0.30894082411912654, "learning_rate": 2.9660499089894e-06, "loss": 0.1714, "step": 9469 }, { "epoch": 0.7559671110401532, "grad_norm": 0.31461176208401553, "learning_rate": 2.964212401160235e-06, "loss": 0.1386, "step": 9470 }, { "epoch": 0.7560469386125968, "grad_norm": 0.3000995262482346, "learning_rate": 2.962375363645632e-06, "loss": 0.2033, "step": 9471 }, { "epoch": 0.7561267661850403, "grad_norm": 0.33727807825074374, "learning_rate": 2.9605387965683794e-06, "loss": 0.1182, "step": 9472 }, { "epoch": 0.7562065937574838, "grad_norm": 0.3025039741513638, "learning_rate": 2.9587027000512546e-06, "loss": 0.1512, "step": 9473 }, { "epoch": 0.7562864213299274, "grad_norm": 0.283940855842527, "learning_rate": 2.956867074216988e-06, "loss": 0.1937, "step": 9474 }, { "epoch": 0.7563662489023709, "grad_norm": 0.30337992860283713, "learning_rate": 2.9550319191882848e-06, "loss": 0.1129, "step": 9475 }, { "epoch": 0.7564460764748144, "grad_norm": 0.28334656780220785, "learning_rate": 2.953197235087818e-06, "loss": 0.1564, "step": 9476 }, { "epoch": 0.756525904047258, "grad_norm": 0.3110572837137083, "learning_rate": 2.951363022038225e-06, "loss": 0.1358, "step": 9477 }, { "epoch": 0.7566057316197015, "grad_norm": 0.3036498080310333, "learning_rate": 2.949529280162123e-06, "loss": 0.1836, "step": 9478 }, { "epoch": 0.756685559192145, "grad_norm": 0.3297466538821619, "learning_rate": 2.9476960095820874e-06, "loss": 0.1295, "step": 9479 }, { "epoch": 0.7567653867645885, "grad_norm": 0.27239882959647127, "learning_rate": 2.9458632104206643e-06, "loss": 0.1822, "step": 9480 }, { "epoch": 0.756845214337032, "grad_norm": 0.30130669251367037, "learning_rate": 2.9440308828003696e-06, "loss": 0.2063, "step": 9481 }, { "epoch": 0.7569250419094755, "grad_norm": 0.30994485734127136, "learning_rate": 2.942199026843684e-06, "loss": 0.1422, "step": 9482 }, { "epoch": 0.757004869481919, "grad_norm": 0.32396034577705196, "learning_rate": 2.9403676426730655e-06, "loss": 0.176, "step": 9483 }, { "epoch": 0.7570846970543625, "grad_norm": 0.25359459916531096, "learning_rate": 2.9385367304109325e-06, "loss": 0.1554, "step": 9484 }, { "epoch": 0.7571645246268061, "grad_norm": 0.2674349847495946, "learning_rate": 2.936706290179673e-06, "loss": 0.165, "step": 9485 }, { "epoch": 0.7572443521992496, "grad_norm": 0.2486668294376818, "learning_rate": 2.934876322101643e-06, "loss": 0.1568, "step": 9486 }, { "epoch": 0.7573241797716932, "grad_norm": 0.33506568706046547, "learning_rate": 2.933046826299175e-06, "loss": 0.2134, "step": 9487 }, { "epoch": 0.7574040073441367, "grad_norm": 0.26815935896515175, "learning_rate": 2.931217802894559e-06, "loss": 0.1429, "step": 9488 }, { "epoch": 0.7574838349165802, "grad_norm": 0.2773164712706871, "learning_rate": 2.9293892520100588e-06, "loss": 0.178, "step": 9489 }, { "epoch": 0.7575636624890237, "grad_norm": 0.24025951283207456, "learning_rate": 2.9275611737679055e-06, "loss": 0.1709, "step": 9490 }, { "epoch": 0.7576434900614673, "grad_norm": 0.28383411078789955, "learning_rate": 2.9257335682902964e-06, "loss": 0.1849, "step": 9491 }, { "epoch": 0.7577233176339108, "grad_norm": 0.24533237352548667, "learning_rate": 2.923906435699405e-06, "loss": 0.1832, "step": 9492 }, { "epoch": 0.7578031452063543, "grad_norm": 0.33699665073122775, "learning_rate": 2.922079776117366e-06, "loss": 0.1843, "step": 9493 }, { "epoch": 0.7578829727787978, "grad_norm": 0.24581257020985423, "learning_rate": 2.9202535896662832e-06, "loss": 0.1719, "step": 9494 }, { "epoch": 0.7579628003512413, "grad_norm": 0.3166543798615091, "learning_rate": 2.9184278764682307e-06, "loss": 0.1762, "step": 9495 }, { "epoch": 0.7580426279236848, "grad_norm": 0.2738310542699218, "learning_rate": 2.916602636645246e-06, "loss": 0.1558, "step": 9496 }, { "epoch": 0.7581224554961283, "grad_norm": 0.29714981786917155, "learning_rate": 2.9147778703193475e-06, "loss": 0.1621, "step": 9497 }, { "epoch": 0.7582022830685718, "grad_norm": 0.2755461526721697, "learning_rate": 2.9129535776125085e-06, "loss": 0.1857, "step": 9498 }, { "epoch": 0.7582821106410154, "grad_norm": 0.2924929447030371, "learning_rate": 2.911129758646676e-06, "loss": 0.1506, "step": 9499 }, { "epoch": 0.7583619382134589, "grad_norm": 0.2630696835277658, "learning_rate": 2.9093064135437622e-06, "loss": 0.1689, "step": 9500 }, { "epoch": 0.7584417657859025, "grad_norm": 0.27456368764743055, "learning_rate": 2.9074835424256577e-06, "loss": 0.1284, "step": 9501 }, { "epoch": 0.758521593358346, "grad_norm": 0.34089839058109334, "learning_rate": 2.9056611454142092e-06, "loss": 0.203, "step": 9502 }, { "epoch": 0.7586014209307895, "grad_norm": 0.25759241304205704, "learning_rate": 2.9038392226312397e-06, "loss": 0.1261, "step": 9503 }, { "epoch": 0.758681248503233, "grad_norm": 0.27270196542787445, "learning_rate": 2.9020177741985333e-06, "loss": 0.1569, "step": 9504 }, { "epoch": 0.7587610760756766, "grad_norm": 0.313976563115361, "learning_rate": 2.9001968002378477e-06, "loss": 0.2212, "step": 9505 }, { "epoch": 0.7588409036481201, "grad_norm": 0.2941395851221542, "learning_rate": 2.8983763008709143e-06, "loss": 0.1674, "step": 9506 }, { "epoch": 0.7589207312205636, "grad_norm": 0.26306433271940055, "learning_rate": 2.8965562762194156e-06, "loss": 0.1793, "step": 9507 }, { "epoch": 0.7590005587930071, "grad_norm": 0.29901768937297035, "learning_rate": 2.8947367264050207e-06, "loss": 0.2344, "step": 9508 }, { "epoch": 0.7590803863654506, "grad_norm": 0.2379592549441105, "learning_rate": 2.892917651549355e-06, "loss": 0.1533, "step": 9509 }, { "epoch": 0.7591602139378941, "grad_norm": 0.2516088519651481, "learning_rate": 2.891099051774021e-06, "loss": 0.2017, "step": 9510 }, { "epoch": 0.7592400415103376, "grad_norm": 0.2689940976674729, "learning_rate": 2.8892809272005863e-06, "loss": 0.1592, "step": 9511 }, { "epoch": 0.7593198690827812, "grad_norm": 0.2783234811559232, "learning_rate": 2.887463277950575e-06, "loss": 0.1178, "step": 9512 }, { "epoch": 0.7593996966552247, "grad_norm": 0.28991873971996307, "learning_rate": 2.8856461041454986e-06, "loss": 0.159, "step": 9513 }, { "epoch": 0.7594795242276683, "grad_norm": 0.29665180289306087, "learning_rate": 2.883829405906823e-06, "loss": 0.153, "step": 9514 }, { "epoch": 0.7595593518001118, "grad_norm": 0.256524289560521, "learning_rate": 2.8820131833559963e-06, "loss": 0.128, "step": 9515 }, { "epoch": 0.7596391793725553, "grad_norm": 0.26740562494044445, "learning_rate": 2.8801974366144135e-06, "loss": 0.1476, "step": 9516 }, { "epoch": 0.7597190069449988, "grad_norm": 0.3008721212411244, "learning_rate": 2.8783821658034584e-06, "loss": 0.1608, "step": 9517 }, { "epoch": 0.7597988345174423, "grad_norm": 0.2713750082121554, "learning_rate": 2.876567371044473e-06, "loss": 0.1585, "step": 9518 }, { "epoch": 0.7598786620898859, "grad_norm": 0.25636041126990955, "learning_rate": 2.8747530524587674e-06, "loss": 0.1431, "step": 9519 }, { "epoch": 0.7599584896623294, "grad_norm": 0.2839986938042362, "learning_rate": 2.8729392101676234e-06, "loss": 0.1783, "step": 9520 }, { "epoch": 0.7600383172347729, "grad_norm": 0.24374921887754675, "learning_rate": 2.8711258442922853e-06, "loss": 0.1577, "step": 9521 }, { "epoch": 0.7601181448072164, "grad_norm": 0.26982791259455496, "learning_rate": 2.8693129549539755e-06, "loss": 0.1456, "step": 9522 }, { "epoch": 0.7601979723796599, "grad_norm": 0.2811167614742351, "learning_rate": 2.8675005422738755e-06, "loss": 0.1289, "step": 9523 }, { "epoch": 0.7602777999521034, "grad_norm": 0.26058934894955355, "learning_rate": 2.8656886063731382e-06, "loss": 0.2099, "step": 9524 }, { "epoch": 0.7603576275245469, "grad_norm": 0.3032207856908676, "learning_rate": 2.8638771473728843e-06, "loss": 0.181, "step": 9525 }, { "epoch": 0.7604374550969905, "grad_norm": 0.3011119109365231, "learning_rate": 2.862066165394198e-06, "loss": 0.1636, "step": 9526 }, { "epoch": 0.760517282669434, "grad_norm": 0.26855677652354004, "learning_rate": 2.8602556605581456e-06, "loss": 0.1511, "step": 9527 }, { "epoch": 0.7605971102418776, "grad_norm": 0.3149299533396439, "learning_rate": 2.8584456329857456e-06, "loss": 0.1661, "step": 9528 }, { "epoch": 0.7606769378143211, "grad_norm": 0.2600509585591936, "learning_rate": 2.8566360827979945e-06, "loss": 0.1295, "step": 9529 }, { "epoch": 0.7607567653867646, "grad_norm": 0.28375713141125675, "learning_rate": 2.854827010115847e-06, "loss": 0.1649, "step": 9530 }, { "epoch": 0.7608365929592081, "grad_norm": 0.2856135138393289, "learning_rate": 2.8530184150602423e-06, "loss": 0.1354, "step": 9531 }, { "epoch": 0.7609164205316516, "grad_norm": 0.38762908360793924, "learning_rate": 2.8512102977520715e-06, "loss": 0.1854, "step": 9532 }, { "epoch": 0.7609962481040952, "grad_norm": 0.3119545678096161, "learning_rate": 2.8494026583122027e-06, "loss": 0.1993, "step": 9533 }, { "epoch": 0.7610760756765387, "grad_norm": 0.26043246594866043, "learning_rate": 2.8475954968614683e-06, "loss": 0.158, "step": 9534 }, { "epoch": 0.7611559032489822, "grad_norm": 0.28467261591524023, "learning_rate": 2.8457888135206655e-06, "loss": 0.1842, "step": 9535 }, { "epoch": 0.7612357308214257, "grad_norm": 0.2643577808517535, "learning_rate": 2.8439826084105728e-06, "loss": 0.1352, "step": 9536 }, { "epoch": 0.7613155583938692, "grad_norm": 0.2884995247568513, "learning_rate": 2.8421768816519223e-06, "loss": 0.2043, "step": 9537 }, { "epoch": 0.7613953859663127, "grad_norm": 0.2948782624776125, "learning_rate": 2.840371633365421e-06, "loss": 0.2, "step": 9538 }, { "epoch": 0.7614752135387562, "grad_norm": 0.27013342782329663, "learning_rate": 2.838566863671739e-06, "loss": 0.1462, "step": 9539 }, { "epoch": 0.7615550411111998, "grad_norm": 0.3133951710371597, "learning_rate": 2.8367625726915248e-06, "loss": 0.1527, "step": 9540 }, { "epoch": 0.7616348686836434, "grad_norm": 0.33642829222844994, "learning_rate": 2.8349587605453853e-06, "loss": 0.1585, "step": 9541 }, { "epoch": 0.7617146962560869, "grad_norm": 0.2779043508915947, "learning_rate": 2.8331554273538963e-06, "loss": 0.1589, "step": 9542 }, { "epoch": 0.7617945238285304, "grad_norm": 0.29265439229259366, "learning_rate": 2.8313525732376058e-06, "loss": 0.1361, "step": 9543 }, { "epoch": 0.7618743514009739, "grad_norm": 0.3299506099830186, "learning_rate": 2.8295501983170228e-06, "loss": 0.1516, "step": 9544 }, { "epoch": 0.7619541789734174, "grad_norm": 0.32406974846003767, "learning_rate": 2.8277483027126352e-06, "loss": 0.1543, "step": 9545 }, { "epoch": 0.762034006545861, "grad_norm": 0.3262947675687775, "learning_rate": 2.825946886544891e-06, "loss": 0.1573, "step": 9546 }, { "epoch": 0.7621138341183045, "grad_norm": 0.30038042701505074, "learning_rate": 2.824145949934206e-06, "loss": 0.1449, "step": 9547 }, { "epoch": 0.762193661690748, "grad_norm": 0.3028850716525007, "learning_rate": 2.8223454930009663e-06, "loss": 0.1421, "step": 9548 }, { "epoch": 0.7622734892631915, "grad_norm": 0.3446414868653061, "learning_rate": 2.820545515865523e-06, "loss": 0.2147, "step": 9549 }, { "epoch": 0.762353316835635, "grad_norm": 0.2852630321386194, "learning_rate": 2.818746018648205e-06, "loss": 0.1753, "step": 9550 }, { "epoch": 0.7624331444080785, "grad_norm": 0.26519765901667053, "learning_rate": 2.816947001469291e-06, "loss": 0.1456, "step": 9551 }, { "epoch": 0.762512971980522, "grad_norm": 0.2918621512389998, "learning_rate": 2.8151484644490456e-06, "loss": 0.1185, "step": 9552 }, { "epoch": 0.7625927995529656, "grad_norm": 0.2729652769430132, "learning_rate": 2.81335040770769e-06, "loss": 0.1498, "step": 9553 }, { "epoch": 0.7626726271254091, "grad_norm": 0.3044519101360998, "learning_rate": 2.811552831365422e-06, "loss": 0.1634, "step": 9554 }, { "epoch": 0.7627524546978527, "grad_norm": 0.2806408414554555, "learning_rate": 2.809755735542402e-06, "loss": 0.1942, "step": 9555 }, { "epoch": 0.7628322822702962, "grad_norm": 0.31231630647965974, "learning_rate": 2.807959120358751e-06, "loss": 0.1872, "step": 9556 }, { "epoch": 0.7629121098427397, "grad_norm": 0.2983895089218492, "learning_rate": 2.8061629859345727e-06, "loss": 0.2209, "step": 9557 }, { "epoch": 0.7629919374151832, "grad_norm": 0.2677061958450149, "learning_rate": 2.804367332389927e-06, "loss": 0.1475, "step": 9558 }, { "epoch": 0.7630717649876267, "grad_norm": 0.31630806818962637, "learning_rate": 2.802572159844855e-06, "loss": 0.1514, "step": 9559 }, { "epoch": 0.7631515925600703, "grad_norm": 0.33599698676789425, "learning_rate": 2.8007774684193456e-06, "loss": 0.1835, "step": 9560 }, { "epoch": 0.7632314201325138, "grad_norm": 0.325300480870564, "learning_rate": 2.7989832582333744e-06, "loss": 0.135, "step": 9561 }, { "epoch": 0.7633112477049573, "grad_norm": 0.29403472365342254, "learning_rate": 2.7971895294068754e-06, "loss": 0.1782, "step": 9562 }, { "epoch": 0.7633910752774008, "grad_norm": 0.26284393275618206, "learning_rate": 2.795396282059751e-06, "loss": 0.1981, "step": 9563 }, { "epoch": 0.7634709028498443, "grad_norm": 0.30770535441338176, "learning_rate": 2.793603516311875e-06, "loss": 0.1279, "step": 9564 }, { "epoch": 0.7635507304222878, "grad_norm": 0.28108332659435054, "learning_rate": 2.7918112322830803e-06, "loss": 0.1664, "step": 9565 }, { "epoch": 0.7636305579947313, "grad_norm": 0.30852507600181844, "learning_rate": 2.7900194300931825e-06, "loss": 0.1321, "step": 9566 }, { "epoch": 0.7637103855671749, "grad_norm": 0.27264550849413116, "learning_rate": 2.7882281098619535e-06, "loss": 0.1418, "step": 9567 }, { "epoch": 0.7637902131396185, "grad_norm": 0.30428305924977095, "learning_rate": 2.7864372717091346e-06, "loss": 0.1342, "step": 9568 }, { "epoch": 0.763870040712062, "grad_norm": 0.267723983504181, "learning_rate": 2.7846469157544353e-06, "loss": 0.1376, "step": 9569 }, { "epoch": 0.7639498682845055, "grad_norm": 0.30478860659871565, "learning_rate": 2.782857042117537e-06, "loss": 0.149, "step": 9570 }, { "epoch": 0.764029695856949, "grad_norm": 0.2667876912652683, "learning_rate": 2.7810676509180843e-06, "loss": 0.1436, "step": 9571 }, { "epoch": 0.7641095234293925, "grad_norm": 0.24207174203817317, "learning_rate": 2.779278742275692e-06, "loss": 0.1462, "step": 9572 }, { "epoch": 0.764189351001836, "grad_norm": 0.25163107628035397, "learning_rate": 2.7774903163099388e-06, "loss": 0.141, "step": 9573 }, { "epoch": 0.7642691785742796, "grad_norm": 0.32255111141839754, "learning_rate": 2.7757023731403732e-06, "loss": 0.162, "step": 9574 }, { "epoch": 0.7643490061467231, "grad_norm": 0.3214273201327758, "learning_rate": 2.773914912886517e-06, "loss": 0.1551, "step": 9575 }, { "epoch": 0.7644288337191666, "grad_norm": 0.3211641645910337, "learning_rate": 2.772127935667853e-06, "loss": 0.1747, "step": 9576 }, { "epoch": 0.7645086612916101, "grad_norm": 0.29396732631902184, "learning_rate": 2.7703414416038323e-06, "loss": 0.1427, "step": 9577 }, { "epoch": 0.7645884888640536, "grad_norm": 0.3439272940357434, "learning_rate": 2.768555430813875e-06, "loss": 0.1506, "step": 9578 }, { "epoch": 0.7646683164364971, "grad_norm": 0.24624748937916646, "learning_rate": 2.7667699034173667e-06, "loss": 0.1116, "step": 9579 }, { "epoch": 0.7647481440089406, "grad_norm": 0.25465703278965346, "learning_rate": 2.764984859533668e-06, "loss": 0.1661, "step": 9580 }, { "epoch": 0.7648279715813842, "grad_norm": 0.283618133698028, "learning_rate": 2.7632002992821005e-06, "loss": 0.1248, "step": 9581 }, { "epoch": 0.7649077991538278, "grad_norm": 0.3516078127842286, "learning_rate": 2.761416222781953e-06, "loss": 0.1565, "step": 9582 }, { "epoch": 0.7649876267262713, "grad_norm": 0.27815348055754285, "learning_rate": 2.759632630152481e-06, "loss": 0.1339, "step": 9583 }, { "epoch": 0.7650674542987148, "grad_norm": 0.28058870966683186, "learning_rate": 2.7578495215129196e-06, "loss": 0.1359, "step": 9584 }, { "epoch": 0.7651472818711583, "grad_norm": 0.24358210189352753, "learning_rate": 2.7560668969824557e-06, "loss": 0.1918, "step": 9585 }, { "epoch": 0.7652271094436018, "grad_norm": 0.34563042535669336, "learning_rate": 2.7542847566802532e-06, "loss": 0.1223, "step": 9586 }, { "epoch": 0.7653069370160454, "grad_norm": 0.26139683376023204, "learning_rate": 2.75250310072544e-06, "loss": 0.1163, "step": 9587 }, { "epoch": 0.7653867645884889, "grad_norm": 0.2529827273069801, "learning_rate": 2.7507219292371112e-06, "loss": 0.1776, "step": 9588 }, { "epoch": 0.7654665921609324, "grad_norm": 0.2760946062867245, "learning_rate": 2.7489412423343344e-06, "loss": 0.1451, "step": 9589 }, { "epoch": 0.7655464197333759, "grad_norm": 0.291159946149068, "learning_rate": 2.7471610401361413e-06, "loss": 0.1781, "step": 9590 }, { "epoch": 0.7656262473058194, "grad_norm": 0.27213634987211327, "learning_rate": 2.745381322761529e-06, "loss": 0.1673, "step": 9591 }, { "epoch": 0.7657060748782629, "grad_norm": 0.2614590791766867, "learning_rate": 2.743602090329467e-06, "loss": 0.143, "step": 9592 }, { "epoch": 0.7657859024507064, "grad_norm": 0.2818992469918115, "learning_rate": 2.741823342958885e-06, "loss": 0.1656, "step": 9593 }, { "epoch": 0.76586573002315, "grad_norm": 0.29656294680430867, "learning_rate": 2.740045080768694e-06, "loss": 0.1515, "step": 9594 }, { "epoch": 0.7659455575955935, "grad_norm": 0.2941949180843554, "learning_rate": 2.738267303877754e-06, "loss": 0.093, "step": 9595 }, { "epoch": 0.7660253851680371, "grad_norm": 0.2883733133364269, "learning_rate": 2.7364900124049086e-06, "loss": 0.2014, "step": 9596 }, { "epoch": 0.7661052127404806, "grad_norm": 0.2901471329082452, "learning_rate": 2.7347132064689595e-06, "loss": 0.1486, "step": 9597 }, { "epoch": 0.7661850403129241, "grad_norm": 0.26921581710174197, "learning_rate": 2.7329368861886862e-06, "loss": 0.2082, "step": 9598 }, { "epoch": 0.7662648678853676, "grad_norm": 0.3132100067325081, "learning_rate": 2.731161051682817e-06, "loss": 0.1571, "step": 9599 }, { "epoch": 0.7663446954578111, "grad_norm": 0.25510036987458623, "learning_rate": 2.7293857030700687e-06, "loss": 0.1298, "step": 9600 }, { "epoch": 0.7664245230302547, "grad_norm": 0.25947975299461695, "learning_rate": 2.727610840469114e-06, "loss": 0.1136, "step": 9601 }, { "epoch": 0.7665043506026982, "grad_norm": 0.28942312554247296, "learning_rate": 2.7258364639985913e-06, "loss": 0.1865, "step": 9602 }, { "epoch": 0.7665841781751417, "grad_norm": 0.23769977759989286, "learning_rate": 2.7240625737771197e-06, "loss": 0.2204, "step": 9603 }, { "epoch": 0.7666640057475852, "grad_norm": 0.30707424715754283, "learning_rate": 2.7222891699232644e-06, "loss": 0.1743, "step": 9604 }, { "epoch": 0.7667438333200287, "grad_norm": 0.350639203157808, "learning_rate": 2.7205162525555817e-06, "loss": 0.1854, "step": 9605 }, { "epoch": 0.7668236608924722, "grad_norm": 0.27060732870192317, "learning_rate": 2.7187438217925754e-06, "loss": 0.1514, "step": 9606 }, { "epoch": 0.7669034884649157, "grad_norm": 0.26493689519702096, "learning_rate": 2.7169718777527352e-06, "loss": 0.1957, "step": 9607 }, { "epoch": 0.7669833160373593, "grad_norm": 0.25994844402642975, "learning_rate": 2.7152004205545013e-06, "loss": 0.1583, "step": 9608 }, { "epoch": 0.7670631436098029, "grad_norm": 0.3083157425815438, "learning_rate": 2.713429450316286e-06, "loss": 0.1549, "step": 9609 }, { "epoch": 0.7671429711822464, "grad_norm": 0.24567875270678782, "learning_rate": 2.7116589671564797e-06, "loss": 0.1524, "step": 9610 }, { "epoch": 0.7672227987546899, "grad_norm": 0.24745788190865148, "learning_rate": 2.7098889711934284e-06, "loss": 0.1483, "step": 9611 }, { "epoch": 0.7673026263271334, "grad_norm": 0.3029328557785738, "learning_rate": 2.70811946254545e-06, "loss": 0.167, "step": 9612 }, { "epoch": 0.7673824538995769, "grad_norm": 0.30110034160103516, "learning_rate": 2.706350441330824e-06, "loss": 0.1868, "step": 9613 }, { "epoch": 0.7674622814720204, "grad_norm": 0.2576813024454672, "learning_rate": 2.7045819076678105e-06, "loss": 0.1428, "step": 9614 }, { "epoch": 0.767542109044464, "grad_norm": 0.30572083724004334, "learning_rate": 2.7028138616746257e-06, "loss": 0.1604, "step": 9615 }, { "epoch": 0.7676219366169075, "grad_norm": 0.29583739455086083, "learning_rate": 2.7010463034694566e-06, "loss": 0.1686, "step": 9616 }, { "epoch": 0.767701764189351, "grad_norm": 0.3051428534359296, "learning_rate": 2.6992792331704566e-06, "loss": 0.1758, "step": 9617 }, { "epoch": 0.7677815917617945, "grad_norm": 0.2636660150979627, "learning_rate": 2.697512650895744e-06, "loss": 0.1846, "step": 9618 }, { "epoch": 0.767861419334238, "grad_norm": 0.32135670536320915, "learning_rate": 2.6957465567634157e-06, "loss": 0.1781, "step": 9619 }, { "epoch": 0.7679412469066815, "grad_norm": 0.26730602826312766, "learning_rate": 2.693980950891524e-06, "loss": 0.1562, "step": 9620 }, { "epoch": 0.768021074479125, "grad_norm": 0.29273978381530413, "learning_rate": 2.692215833398092e-06, "loss": 0.1483, "step": 9621 }, { "epoch": 0.7681009020515686, "grad_norm": 0.3028556413772501, "learning_rate": 2.6904512044011124e-06, "loss": 0.154, "step": 9622 }, { "epoch": 0.7681807296240122, "grad_norm": 0.32651686135027036, "learning_rate": 2.6886870640185393e-06, "loss": 0.2125, "step": 9623 }, { "epoch": 0.7682605571964557, "grad_norm": 0.29517269260786205, "learning_rate": 2.686923412368305e-06, "loss": 0.1271, "step": 9624 }, { "epoch": 0.7683403847688992, "grad_norm": 0.3390627856818874, "learning_rate": 2.6851602495683005e-06, "loss": 0.132, "step": 9625 }, { "epoch": 0.7684202123413427, "grad_norm": 0.2626662210300836, "learning_rate": 2.683397575736384e-06, "loss": 0.1435, "step": 9626 }, { "epoch": 0.7685000399137862, "grad_norm": 0.340969134963012, "learning_rate": 2.6816353909903824e-06, "loss": 0.1688, "step": 9627 }, { "epoch": 0.7685798674862298, "grad_norm": 0.2868611563777659, "learning_rate": 2.6798736954480965e-06, "loss": 0.1504, "step": 9628 }, { "epoch": 0.7686596950586733, "grad_norm": 0.303445307245539, "learning_rate": 2.678112489227285e-06, "loss": 0.1413, "step": 9629 }, { "epoch": 0.7687395226311168, "grad_norm": 0.2699432100519335, "learning_rate": 2.6763517724456777e-06, "loss": 0.1586, "step": 9630 }, { "epoch": 0.7688193502035603, "grad_norm": 0.2694361812944914, "learning_rate": 2.6745915452209713e-06, "loss": 0.1886, "step": 9631 }, { "epoch": 0.7688991777760038, "grad_norm": 0.3227028126317659, "learning_rate": 2.672831807670827e-06, "loss": 0.1832, "step": 9632 }, { "epoch": 0.7689790053484473, "grad_norm": 0.2951643623562509, "learning_rate": 2.671072559912884e-06, "loss": 0.1852, "step": 9633 }, { "epoch": 0.7690588329208908, "grad_norm": 0.2813193949500945, "learning_rate": 2.6693138020647357e-06, "loss": 0.1619, "step": 9634 }, { "epoch": 0.7691386604933343, "grad_norm": 0.3385233506501638, "learning_rate": 2.6675555342439497e-06, "loss": 0.1231, "step": 9635 }, { "epoch": 0.769218488065778, "grad_norm": 0.2900706462772254, "learning_rate": 2.665797756568055e-06, "loss": 0.1258, "step": 9636 }, { "epoch": 0.7692983156382215, "grad_norm": 0.26567282276767723, "learning_rate": 2.6640404691545585e-06, "loss": 0.132, "step": 9637 }, { "epoch": 0.769378143210665, "grad_norm": 0.29655378199497634, "learning_rate": 2.6622836721209287e-06, "loss": 0.1772, "step": 9638 }, { "epoch": 0.7694579707831085, "grad_norm": 0.34247422318151843, "learning_rate": 2.660527365584592e-06, "loss": 0.1672, "step": 9639 }, { "epoch": 0.769537798355552, "grad_norm": 0.2852506413100592, "learning_rate": 2.6587715496629573e-06, "loss": 0.165, "step": 9640 }, { "epoch": 0.7696176259279955, "grad_norm": 0.26065670666256974, "learning_rate": 2.6570162244733886e-06, "loss": 0.1386, "step": 9641 }, { "epoch": 0.769697453500439, "grad_norm": 0.2626482045446859, "learning_rate": 2.6552613901332338e-06, "loss": 0.1813, "step": 9642 }, { "epoch": 0.7697772810728826, "grad_norm": 0.2715574225031321, "learning_rate": 2.6535070467597814e-06, "loss": 0.2029, "step": 9643 }, { "epoch": 0.7698571086453261, "grad_norm": 0.29143671682764755, "learning_rate": 2.6517531944703134e-06, "loss": 0.192, "step": 9644 }, { "epoch": 0.7699369362177696, "grad_norm": 0.30995902311468027, "learning_rate": 2.649999833382064e-06, "loss": 0.154, "step": 9645 }, { "epoch": 0.7700167637902131, "grad_norm": 0.2794935805919861, "learning_rate": 2.648246963612239e-06, "loss": 0.1573, "step": 9646 }, { "epoch": 0.7700965913626566, "grad_norm": 0.27042814303496693, "learning_rate": 2.64649458527801e-06, "loss": 0.1386, "step": 9647 }, { "epoch": 0.7701764189351001, "grad_norm": 0.27356025381249544, "learning_rate": 2.644742698496514e-06, "loss": 0.1644, "step": 9648 }, { "epoch": 0.7702562465075437, "grad_norm": 0.2998545552658451, "learning_rate": 2.642991303384864e-06, "loss": 0.1446, "step": 9649 }, { "epoch": 0.7703360740799873, "grad_norm": 0.30752522598027926, "learning_rate": 2.6412404000601277e-06, "loss": 0.2017, "step": 9650 }, { "epoch": 0.7704159016524308, "grad_norm": 0.2976086151959518, "learning_rate": 2.6394899886393544e-06, "loss": 0.1593, "step": 9651 }, { "epoch": 0.7704957292248743, "grad_norm": 0.3249583740833502, "learning_rate": 2.637740069239544e-06, "loss": 0.1285, "step": 9652 }, { "epoch": 0.7705755567973178, "grad_norm": 0.2862942884482053, "learning_rate": 2.6359906419776713e-06, "loss": 0.2092, "step": 9653 }, { "epoch": 0.7706553843697613, "grad_norm": 0.3120199133019168, "learning_rate": 2.634241706970685e-06, "loss": 0.1447, "step": 9654 }, { "epoch": 0.7707352119422048, "grad_norm": 0.29402574046227214, "learning_rate": 2.632493264335492e-06, "loss": 0.1567, "step": 9655 }, { "epoch": 0.7708150395146484, "grad_norm": 0.25898363760052756, "learning_rate": 2.630745314188966e-06, "loss": 0.1293, "step": 9656 }, { "epoch": 0.7708948670870919, "grad_norm": 0.25604876895890133, "learning_rate": 2.6289978566479514e-06, "loss": 0.1499, "step": 9657 }, { "epoch": 0.7709746946595354, "grad_norm": 0.258247324731724, "learning_rate": 2.6272508918292614e-06, "loss": 0.1395, "step": 9658 }, { "epoch": 0.7710545222319789, "grad_norm": 0.2865350161214061, "learning_rate": 2.6255044198496724e-06, "loss": 0.176, "step": 9659 }, { "epoch": 0.7711343498044224, "grad_norm": 0.3057013684271598, "learning_rate": 2.6237584408259297e-06, "loss": 0.1418, "step": 9660 }, { "epoch": 0.7712141773768659, "grad_norm": 0.28917645660100544, "learning_rate": 2.6220129548747443e-06, "loss": 0.1574, "step": 9661 }, { "epoch": 0.7712940049493094, "grad_norm": 0.27705731770036723, "learning_rate": 2.6202679621127915e-06, "loss": 0.1574, "step": 9662 }, { "epoch": 0.7713738325217531, "grad_norm": 0.25435305917680145, "learning_rate": 2.6185234626567235e-06, "loss": 0.1355, "step": 9663 }, { "epoch": 0.7714536600941966, "grad_norm": 0.3958501149599602, "learning_rate": 2.616779456623151e-06, "loss": 0.159, "step": 9664 }, { "epoch": 0.7715334876666401, "grad_norm": 0.3050297789946962, "learning_rate": 2.615035944128653e-06, "loss": 0.1445, "step": 9665 }, { "epoch": 0.7716133152390836, "grad_norm": 0.27549538568817, "learning_rate": 2.6132929252897743e-06, "loss": 0.1155, "step": 9666 }, { "epoch": 0.7716931428115271, "grad_norm": 0.2913959340390773, "learning_rate": 2.611550400223034e-06, "loss": 0.1339, "step": 9667 }, { "epoch": 0.7717729703839706, "grad_norm": 0.2786867319179721, "learning_rate": 2.6098083690449094e-06, "loss": 0.1493, "step": 9668 }, { "epoch": 0.7718527979564141, "grad_norm": 0.26079633298345983, "learning_rate": 2.6080668318718503e-06, "loss": 0.1264, "step": 9669 }, { "epoch": 0.7719326255288577, "grad_norm": 0.28179062538241345, "learning_rate": 2.60632578882027e-06, "loss": 0.1418, "step": 9670 }, { "epoch": 0.7720124531013012, "grad_norm": 0.2709765903528494, "learning_rate": 2.6045852400065474e-06, "loss": 0.1516, "step": 9671 }, { "epoch": 0.7720922806737447, "grad_norm": 0.26195027775677987, "learning_rate": 2.6028451855470382e-06, "loss": 0.1242, "step": 9672 }, { "epoch": 0.7721721082461882, "grad_norm": 0.2645661866117952, "learning_rate": 2.601105625558055e-06, "loss": 0.1359, "step": 9673 }, { "epoch": 0.7722519358186317, "grad_norm": 0.30849510039549455, "learning_rate": 2.59936656015588e-06, "loss": 0.147, "step": 9674 }, { "epoch": 0.7723317633910752, "grad_norm": 0.3229102721031648, "learning_rate": 2.597627989456762e-06, "loss": 0.1753, "step": 9675 }, { "epoch": 0.7724115909635187, "grad_norm": 0.3101350467141324, "learning_rate": 2.5958899135769177e-06, "loss": 0.1417, "step": 9676 }, { "epoch": 0.7724914185359624, "grad_norm": 0.27260503393279223, "learning_rate": 2.5941523326325337e-06, "loss": 0.1841, "step": 9677 }, { "epoch": 0.7725712461084059, "grad_norm": 0.3341865710501151, "learning_rate": 2.5924152467397566e-06, "loss": 0.1312, "step": 9678 }, { "epoch": 0.7726510736808494, "grad_norm": 0.2648971833382809, "learning_rate": 2.5906786560147067e-06, "loss": 0.1473, "step": 9679 }, { "epoch": 0.7727309012532929, "grad_norm": 0.27367578989783503, "learning_rate": 2.5889425605734643e-06, "loss": 0.1334, "step": 9680 }, { "epoch": 0.7728107288257364, "grad_norm": 0.28058969050416743, "learning_rate": 2.5872069605320847e-06, "loss": 0.1609, "step": 9681 }, { "epoch": 0.7728905563981799, "grad_norm": 0.2877759095203474, "learning_rate": 2.5854718560065883e-06, "loss": 0.1887, "step": 9682 }, { "epoch": 0.7729703839706235, "grad_norm": 0.3195440603078107, "learning_rate": 2.5837372471129498e-06, "loss": 0.1691, "step": 9683 }, { "epoch": 0.773050211543067, "grad_norm": 0.2714271656187888, "learning_rate": 2.5820031339671294e-06, "loss": 0.1645, "step": 9684 }, { "epoch": 0.7731300391155105, "grad_norm": 0.2771202122218905, "learning_rate": 2.58026951668504e-06, "loss": 0.1543, "step": 9685 }, { "epoch": 0.773209866687954, "grad_norm": 0.2621566791679223, "learning_rate": 2.578536395382577e-06, "loss": 0.1793, "step": 9686 }, { "epoch": 0.7732896942603975, "grad_norm": 0.2996216969445446, "learning_rate": 2.576803770175581e-06, "loss": 0.1414, "step": 9687 }, { "epoch": 0.773369521832841, "grad_norm": 0.27356042028298766, "learning_rate": 2.575071641179878e-06, "loss": 0.1392, "step": 9688 }, { "epoch": 0.7734493494052845, "grad_norm": 0.29242190866623496, "learning_rate": 2.5733400085112524e-06, "loss": 0.1596, "step": 9689 }, { "epoch": 0.7735291769777282, "grad_norm": 0.3070650598763928, "learning_rate": 2.571608872285457e-06, "loss": 0.1604, "step": 9690 }, { "epoch": 0.7736090045501717, "grad_norm": 0.2976441175241644, "learning_rate": 2.5698782326182116e-06, "loss": 0.1319, "step": 9691 }, { "epoch": 0.7736888321226152, "grad_norm": 0.2911973426625666, "learning_rate": 2.5681480896251987e-06, "loss": 0.1647, "step": 9692 }, { "epoch": 0.7737686596950587, "grad_norm": 0.30061013235839446, "learning_rate": 2.5664184434220784e-06, "loss": 0.1622, "step": 9693 }, { "epoch": 0.7738484872675022, "grad_norm": 0.24661014617904842, "learning_rate": 2.564689294124464e-06, "loss": 0.1903, "step": 9694 }, { "epoch": 0.7739283148399457, "grad_norm": 0.22232844699071294, "learning_rate": 2.5629606418479525e-06, "loss": 0.116, "step": 9695 }, { "epoch": 0.7740081424123892, "grad_norm": 0.3013321155821568, "learning_rate": 2.561232486708086e-06, "loss": 0.1548, "step": 9696 }, { "epoch": 0.7740879699848328, "grad_norm": 0.2676775398257772, "learning_rate": 2.5595048288203874e-06, "loss": 0.1905, "step": 9697 }, { "epoch": 0.7741677975572763, "grad_norm": 0.3373205113243225, "learning_rate": 2.5577776683003488e-06, "loss": 0.1607, "step": 9698 }, { "epoch": 0.7742476251297198, "grad_norm": 0.2826288410381785, "learning_rate": 2.5560510052634213e-06, "loss": 0.1347, "step": 9699 }, { "epoch": 0.7743274527021633, "grad_norm": 0.3293819169509645, "learning_rate": 2.5543248398250243e-06, "loss": 0.1255, "step": 9700 }, { "epoch": 0.7744072802746068, "grad_norm": 0.3420754312717753, "learning_rate": 2.552599172100544e-06, "loss": 0.1831, "step": 9701 }, { "epoch": 0.7744871078470503, "grad_norm": 0.2905751423949018, "learning_rate": 2.550874002205339e-06, "loss": 0.1241, "step": 9702 }, { "epoch": 0.7745669354194938, "grad_norm": 0.3039415117523565, "learning_rate": 2.5491493302547277e-06, "loss": 0.1536, "step": 9703 }, { "epoch": 0.7746467629919375, "grad_norm": 0.2717434754609104, "learning_rate": 2.5474251563639973e-06, "loss": 0.1975, "step": 9704 }, { "epoch": 0.774726590564381, "grad_norm": 0.3558067422944459, "learning_rate": 2.5457014806484028e-06, "loss": 0.1648, "step": 9705 }, { "epoch": 0.7748064181368245, "grad_norm": 0.24960504348294935, "learning_rate": 2.5439783032231613e-06, "loss": 0.1668, "step": 9706 }, { "epoch": 0.774886245709268, "grad_norm": 0.3148179978222706, "learning_rate": 2.542255624203467e-06, "loss": 0.1655, "step": 9707 }, { "epoch": 0.7749660732817115, "grad_norm": 0.3079031714565233, "learning_rate": 2.5405334437044705e-06, "loss": 0.1412, "step": 9708 }, { "epoch": 0.775045900854155, "grad_norm": 0.3061697395762972, "learning_rate": 2.538811761841292e-06, "loss": 0.1557, "step": 9709 }, { "epoch": 0.7751257284265985, "grad_norm": 0.2947290668893285, "learning_rate": 2.537090578729019e-06, "loss": 0.1682, "step": 9710 }, { "epoch": 0.7752055559990421, "grad_norm": 0.35648291158236506, "learning_rate": 2.5353698944827086e-06, "loss": 0.1924, "step": 9711 }, { "epoch": 0.7752853835714856, "grad_norm": 0.29579437197029174, "learning_rate": 2.533649709217381e-06, "loss": 0.1705, "step": 9712 }, { "epoch": 0.7753652111439291, "grad_norm": 0.3164660047819889, "learning_rate": 2.5319300230480213e-06, "loss": 0.1762, "step": 9713 }, { "epoch": 0.7754450387163726, "grad_norm": 0.32655147453369426, "learning_rate": 2.530210836089586e-06, "loss": 0.182, "step": 9714 }, { "epoch": 0.7755248662888161, "grad_norm": 0.30680229469227244, "learning_rate": 2.5284921484569924e-06, "loss": 0.1581, "step": 9715 }, { "epoch": 0.7756046938612596, "grad_norm": 0.2977373789527671, "learning_rate": 2.526773960265134e-06, "loss": 0.1599, "step": 9716 }, { "epoch": 0.7756845214337033, "grad_norm": 0.2846438439928007, "learning_rate": 2.525056271628862e-06, "loss": 0.1543, "step": 9717 }, { "epoch": 0.7757643490061468, "grad_norm": 0.2879562755787633, "learning_rate": 2.5233390826629957e-06, "loss": 0.1624, "step": 9718 }, { "epoch": 0.7758441765785903, "grad_norm": 0.23650068786035083, "learning_rate": 2.521622393482324e-06, "loss": 0.1468, "step": 9719 }, { "epoch": 0.7759240041510338, "grad_norm": 0.2780367865067256, "learning_rate": 2.5199062042015967e-06, "loss": 0.1466, "step": 9720 }, { "epoch": 0.7760038317234773, "grad_norm": 0.2513653333852598, "learning_rate": 2.518190514935541e-06, "loss": 0.1682, "step": 9721 }, { "epoch": 0.7760836592959208, "grad_norm": 0.29851323757791115, "learning_rate": 2.5164753257988407e-06, "loss": 0.1773, "step": 9722 }, { "epoch": 0.7761634868683643, "grad_norm": 0.2767594843454729, "learning_rate": 2.5147606369061495e-06, "loss": 0.1443, "step": 9723 }, { "epoch": 0.7762433144408079, "grad_norm": 0.3240587894138504, "learning_rate": 2.5130464483720853e-06, "loss": 0.1664, "step": 9724 }, { "epoch": 0.7763231420132514, "grad_norm": 0.31831664922658764, "learning_rate": 2.5113327603112383e-06, "loss": 0.2019, "step": 9725 }, { "epoch": 0.7764029695856949, "grad_norm": 0.24913297180368285, "learning_rate": 2.5096195728381644e-06, "loss": 0.1694, "step": 9726 }, { "epoch": 0.7764827971581384, "grad_norm": 0.2537153797660532, "learning_rate": 2.507906886067374e-06, "loss": 0.1254, "step": 9727 }, { "epoch": 0.7765626247305819, "grad_norm": 0.3221240848433286, "learning_rate": 2.5061947001133615e-06, "loss": 0.1623, "step": 9728 }, { "epoch": 0.7766424523030254, "grad_norm": 0.27925366282932546, "learning_rate": 2.504483015090573e-06, "loss": 0.1558, "step": 9729 }, { "epoch": 0.7767222798754689, "grad_norm": 0.33628909362996223, "learning_rate": 2.5027718311134397e-06, "loss": 0.1431, "step": 9730 }, { "epoch": 0.7768021074479126, "grad_norm": 0.24867826586285025, "learning_rate": 2.501061148296332e-06, "loss": 0.1508, "step": 9731 }, { "epoch": 0.7768819350203561, "grad_norm": 0.2869538652457459, "learning_rate": 2.499350966753614e-06, "loss": 0.1287, "step": 9732 }, { "epoch": 0.7769617625927996, "grad_norm": 0.26631788924568156, "learning_rate": 2.497641286599598e-06, "loss": 0.1636, "step": 9733 }, { "epoch": 0.7770415901652431, "grad_norm": 0.2587559370942422, "learning_rate": 2.4959321079485776e-06, "loss": 0.1702, "step": 9734 }, { "epoch": 0.7771214177376866, "grad_norm": 0.28181009079353125, "learning_rate": 2.4942234309147963e-06, "loss": 0.1331, "step": 9735 }, { "epoch": 0.7772012453101301, "grad_norm": 0.25306817882469596, "learning_rate": 2.492515255612472e-06, "loss": 0.1463, "step": 9736 }, { "epoch": 0.7772810728825736, "grad_norm": 0.2755984265549445, "learning_rate": 2.4908075821557953e-06, "loss": 0.172, "step": 9737 }, { "epoch": 0.7773609004550172, "grad_norm": 0.2847041865773372, "learning_rate": 2.4891004106589154e-06, "loss": 0.153, "step": 9738 }, { "epoch": 0.7774407280274607, "grad_norm": 0.2842598869426548, "learning_rate": 2.4873937412359484e-06, "loss": 0.2193, "step": 9739 }, { "epoch": 0.7775205555999042, "grad_norm": 0.28094013770281473, "learning_rate": 2.485687574000977e-06, "loss": 0.1606, "step": 9740 }, { "epoch": 0.7776003831723477, "grad_norm": 0.26043859540204767, "learning_rate": 2.4839819090680563e-06, "loss": 0.1718, "step": 9741 }, { "epoch": 0.7776802107447912, "grad_norm": 0.30441036734817095, "learning_rate": 2.482276746551201e-06, "loss": 0.1621, "step": 9742 }, { "epoch": 0.7777600383172347, "grad_norm": 0.3379967211900392, "learning_rate": 2.4805720865643943e-06, "loss": 0.143, "step": 9743 }, { "epoch": 0.7778398658896784, "grad_norm": 0.29249867561029425, "learning_rate": 2.478867929221587e-06, "loss": 0.1418, "step": 9744 }, { "epoch": 0.7779196934621219, "grad_norm": 0.33728669665003874, "learning_rate": 2.477164274636691e-06, "loss": 0.2035, "step": 9745 }, { "epoch": 0.7779995210345654, "grad_norm": 0.2926660834034404, "learning_rate": 2.4754611229235946e-06, "loss": 0.1547, "step": 9746 }, { "epoch": 0.7780793486070089, "grad_norm": 0.2825642387526325, "learning_rate": 2.473758474196144e-06, "loss": 0.128, "step": 9747 }, { "epoch": 0.7781591761794524, "grad_norm": 0.2916000675963756, "learning_rate": 2.4720563285681567e-06, "loss": 0.1823, "step": 9748 }, { "epoch": 0.7782390037518959, "grad_norm": 0.2830832613680838, "learning_rate": 2.470354686153411e-06, "loss": 0.1328, "step": 9749 }, { "epoch": 0.7783188313243394, "grad_norm": 0.31083259515564127, "learning_rate": 2.4686535470656534e-06, "loss": 0.149, "step": 9750 }, { "epoch": 0.778398658896783, "grad_norm": 0.32475020072542016, "learning_rate": 2.466952911418605e-06, "loss": 0.1707, "step": 9751 }, { "epoch": 0.7784784864692265, "grad_norm": 0.31863002661762363, "learning_rate": 2.4652527793259428e-06, "loss": 0.1369, "step": 9752 }, { "epoch": 0.77855831404167, "grad_norm": 0.2699362409743029, "learning_rate": 2.4635531509013143e-06, "loss": 0.1667, "step": 9753 }, { "epoch": 0.7786381416141135, "grad_norm": 0.3103106326414317, "learning_rate": 2.46185402625833e-06, "loss": 0.1257, "step": 9754 }, { "epoch": 0.778717969186557, "grad_norm": 0.27112869133175177, "learning_rate": 2.460155405510576e-06, "loss": 0.153, "step": 9755 }, { "epoch": 0.7787977967590005, "grad_norm": 0.26556265616432034, "learning_rate": 2.458457288771594e-06, "loss": 0.1704, "step": 9756 }, { "epoch": 0.778877624331444, "grad_norm": 0.2839247197792423, "learning_rate": 2.456759676154897e-06, "loss": 0.1066, "step": 9757 }, { "epoch": 0.7789574519038877, "grad_norm": 0.2763011952116983, "learning_rate": 2.4550625677739635e-06, "loss": 0.1476, "step": 9758 }, { "epoch": 0.7790372794763312, "grad_norm": 0.28438660675670585, "learning_rate": 2.4533659637422367e-06, "loss": 0.1959, "step": 9759 }, { "epoch": 0.7791171070487747, "grad_norm": 0.2803987766314362, "learning_rate": 2.451669864173133e-06, "loss": 0.1533, "step": 9760 }, { "epoch": 0.7791969346212182, "grad_norm": 0.27052574884168584, "learning_rate": 2.4499742691800267e-06, "loss": 0.1784, "step": 9761 }, { "epoch": 0.7792767621936617, "grad_norm": 0.3038510434048296, "learning_rate": 2.4482791788762615e-06, "loss": 0.1564, "step": 9762 }, { "epoch": 0.7793565897661052, "grad_norm": 0.29487275940578406, "learning_rate": 2.4465845933751453e-06, "loss": 0.1835, "step": 9763 }, { "epoch": 0.7794364173385487, "grad_norm": 0.28609623223643077, "learning_rate": 2.4448905127899603e-06, "loss": 0.1184, "step": 9764 }, { "epoch": 0.7795162449109923, "grad_norm": 0.28505703853086334, "learning_rate": 2.443196937233948e-06, "loss": 0.1749, "step": 9765 }, { "epoch": 0.7795960724834358, "grad_norm": 0.2861345394694713, "learning_rate": 2.4415038668203096e-06, "loss": 0.1592, "step": 9766 }, { "epoch": 0.7796759000558793, "grad_norm": 0.34157970551520145, "learning_rate": 2.4398113016622283e-06, "loss": 0.1547, "step": 9767 }, { "epoch": 0.7797557276283228, "grad_norm": 0.2860109848507509, "learning_rate": 2.4381192418728393e-06, "loss": 0.1513, "step": 9768 }, { "epoch": 0.7798355552007663, "grad_norm": 0.2895238410004056, "learning_rate": 2.436427687565256e-06, "loss": 0.1949, "step": 9769 }, { "epoch": 0.7799153827732098, "grad_norm": 0.2823694627341689, "learning_rate": 2.434736638852551e-06, "loss": 0.1223, "step": 9770 }, { "epoch": 0.7799952103456533, "grad_norm": 0.2903628061042808, "learning_rate": 2.433046095847762e-06, "loss": 0.1691, "step": 9771 }, { "epoch": 0.780075037918097, "grad_norm": 0.307266564277192, "learning_rate": 2.4313560586638953e-06, "loss": 0.1638, "step": 9772 }, { "epoch": 0.7801548654905405, "grad_norm": 0.4132942559651844, "learning_rate": 2.429666527413922e-06, "loss": 0.1564, "step": 9773 }, { "epoch": 0.780234693062984, "grad_norm": 0.3136503374301032, "learning_rate": 2.4279775022107865e-06, "loss": 0.1699, "step": 9774 }, { "epoch": 0.7803145206354275, "grad_norm": 0.3010001974889302, "learning_rate": 2.426288983167385e-06, "loss": 0.1744, "step": 9775 }, { "epoch": 0.780394348207871, "grad_norm": 0.27045539665607776, "learning_rate": 2.4246009703965943e-06, "loss": 0.168, "step": 9776 }, { "epoch": 0.7804741757803145, "grad_norm": 0.27060336004942337, "learning_rate": 2.4229134640112484e-06, "loss": 0.1733, "step": 9777 }, { "epoch": 0.780554003352758, "grad_norm": 0.2521376578017827, "learning_rate": 2.421226464124157e-06, "loss": 0.2353, "step": 9778 }, { "epoch": 0.7806338309252016, "grad_norm": 0.30422164536020013, "learning_rate": 2.4195399708480805e-06, "loss": 0.1666, "step": 9779 }, { "epoch": 0.7807136584976451, "grad_norm": 0.3080432530175065, "learning_rate": 2.417853984295755e-06, "loss": 0.1443, "step": 9780 }, { "epoch": 0.7807934860700886, "grad_norm": 0.26772251855481627, "learning_rate": 2.4161685045798876e-06, "loss": 0.1503, "step": 9781 }, { "epoch": 0.7808733136425321, "grad_norm": 0.2684499257183194, "learning_rate": 2.414483531813143e-06, "loss": 0.1765, "step": 9782 }, { "epoch": 0.7809531412149756, "grad_norm": 0.3090826124195299, "learning_rate": 2.412799066108157e-06, "loss": 0.1478, "step": 9783 }, { "epoch": 0.7810329687874191, "grad_norm": 0.30364717933094193, "learning_rate": 2.411115107577523e-06, "loss": 0.1606, "step": 9784 }, { "epoch": 0.7811127963598627, "grad_norm": 0.29603792808374957, "learning_rate": 2.409431656333815e-06, "loss": 0.1354, "step": 9785 }, { "epoch": 0.7811926239323063, "grad_norm": 0.2716448978841759, "learning_rate": 2.4077487124895625e-06, "loss": 0.1674, "step": 9786 }, { "epoch": 0.7812724515047498, "grad_norm": 0.296065668077779, "learning_rate": 2.4060662761572616e-06, "loss": 0.149, "step": 9787 }, { "epoch": 0.7813522790771933, "grad_norm": 0.2721411835061599, "learning_rate": 2.404384347449379e-06, "loss": 0.1374, "step": 9788 }, { "epoch": 0.7814321066496368, "grad_norm": 0.2975234921517706, "learning_rate": 2.4027029264783395e-06, "loss": 0.1856, "step": 9789 }, { "epoch": 0.7815119342220803, "grad_norm": 0.26179504307902335, "learning_rate": 2.4010220133565476e-06, "loss": 0.1575, "step": 9790 }, { "epoch": 0.7815917617945238, "grad_norm": 0.26210220985789673, "learning_rate": 2.3993416081963615e-06, "loss": 0.1397, "step": 9791 }, { "epoch": 0.7816715893669673, "grad_norm": 0.2777445115278698, "learning_rate": 2.3976617111101097e-06, "loss": 0.2164, "step": 9792 }, { "epoch": 0.7817514169394109, "grad_norm": 0.31074462558602645, "learning_rate": 2.3959823222100862e-06, "loss": 0.1396, "step": 9793 }, { "epoch": 0.7818312445118544, "grad_norm": 0.30315681747712914, "learning_rate": 2.3943034416085497e-06, "loss": 0.1302, "step": 9794 }, { "epoch": 0.7819110720842979, "grad_norm": 0.31252408435349777, "learning_rate": 2.3926250694177333e-06, "loss": 0.1474, "step": 9795 }, { "epoch": 0.7819908996567414, "grad_norm": 0.3166910511251044, "learning_rate": 2.390947205749824e-06, "loss": 0.1419, "step": 9796 }, { "epoch": 0.7820707272291849, "grad_norm": 0.2794345961530596, "learning_rate": 2.389269850716982e-06, "loss": 0.1484, "step": 9797 }, { "epoch": 0.7821505548016284, "grad_norm": 0.27455385689762957, "learning_rate": 2.387593004431329e-06, "loss": 0.1126, "step": 9798 }, { "epoch": 0.782230382374072, "grad_norm": 0.3007142643245617, "learning_rate": 2.3859166670049605e-06, "loss": 0.1743, "step": 9799 }, { "epoch": 0.7823102099465156, "grad_norm": 0.2528046802083788, "learning_rate": 2.3842408385499316e-06, "loss": 0.1264, "step": 9800 }, { "epoch": 0.7823900375189591, "grad_norm": 0.2727447243745223, "learning_rate": 2.3825655191782626e-06, "loss": 0.135, "step": 9801 }, { "epoch": 0.7824698650914026, "grad_norm": 0.2830893101733161, "learning_rate": 2.380890709001944e-06, "loss": 0.1768, "step": 9802 }, { "epoch": 0.7825496926638461, "grad_norm": 0.2941520062513831, "learning_rate": 2.3792164081329272e-06, "loss": 0.1926, "step": 9803 }, { "epoch": 0.7826295202362896, "grad_norm": 0.30180980941634517, "learning_rate": 2.3775426166831373e-06, "loss": 0.1448, "step": 9804 }, { "epoch": 0.7827093478087331, "grad_norm": 0.32199865446738873, "learning_rate": 2.3758693347644577e-06, "loss": 0.127, "step": 9805 }, { "epoch": 0.7827891753811766, "grad_norm": 0.2985268067249845, "learning_rate": 2.374196562488741e-06, "loss": 0.1536, "step": 9806 }, { "epoch": 0.7828690029536202, "grad_norm": 0.2791530559991905, "learning_rate": 2.3725242999678032e-06, "loss": 0.1159, "step": 9807 }, { "epoch": 0.7829488305260637, "grad_norm": 0.2686153813613097, "learning_rate": 2.3708525473134335e-06, "loss": 0.1624, "step": 9808 }, { "epoch": 0.7830286580985072, "grad_norm": 0.264677970169817, "learning_rate": 2.369181304637382e-06, "loss": 0.1398, "step": 9809 }, { "epoch": 0.7831084856709507, "grad_norm": 0.28468171154262006, "learning_rate": 2.367510572051357e-06, "loss": 0.1682, "step": 9810 }, { "epoch": 0.7831883132433942, "grad_norm": 0.31155096955322453, "learning_rate": 2.3658403496670478e-06, "loss": 0.2095, "step": 9811 }, { "epoch": 0.7832681408158378, "grad_norm": 0.30028764831836763, "learning_rate": 2.364170637596096e-06, "loss": 0.2062, "step": 9812 }, { "epoch": 0.7833479683882814, "grad_norm": 0.2653064785522289, "learning_rate": 2.362501435950123e-06, "loss": 0.1554, "step": 9813 }, { "epoch": 0.7834277959607249, "grad_norm": 0.2314140908402426, "learning_rate": 2.360832744840703e-06, "loss": 0.1488, "step": 9814 }, { "epoch": 0.7835076235331684, "grad_norm": 0.2771099002142212, "learning_rate": 2.3591645643793835e-06, "loss": 0.1465, "step": 9815 }, { "epoch": 0.7835874511056119, "grad_norm": 0.30614014389770433, "learning_rate": 2.3574968946776745e-06, "loss": 0.1604, "step": 9816 }, { "epoch": 0.7836672786780554, "grad_norm": 0.321460994684956, "learning_rate": 2.3558297358470507e-06, "loss": 0.1486, "step": 9817 }, { "epoch": 0.7837471062504989, "grad_norm": 0.27274459943676893, "learning_rate": 2.354163087998963e-06, "loss": 0.1568, "step": 9818 }, { "epoch": 0.7838269338229424, "grad_norm": 0.2690603170926502, "learning_rate": 2.3524969512448094e-06, "loss": 0.1214, "step": 9819 }, { "epoch": 0.783906761395386, "grad_norm": 0.29981231406484, "learning_rate": 2.3508313256959735e-06, "loss": 0.1904, "step": 9820 }, { "epoch": 0.7839865889678295, "grad_norm": 0.3270501084886773, "learning_rate": 2.34916621146379e-06, "loss": 0.1444, "step": 9821 }, { "epoch": 0.784066416540273, "grad_norm": 0.305024154368389, "learning_rate": 2.3475016086595726e-06, "loss": 0.1659, "step": 9822 }, { "epoch": 0.7841462441127165, "grad_norm": 0.29573925801882844, "learning_rate": 2.3458375173945857e-06, "loss": 0.1316, "step": 9823 }, { "epoch": 0.78422607168516, "grad_norm": 0.3299303316702662, "learning_rate": 2.3441739377800675e-06, "loss": 0.2003, "step": 9824 }, { "epoch": 0.7843058992576035, "grad_norm": 0.3038718038616992, "learning_rate": 2.3425108699272283e-06, "loss": 0.1514, "step": 9825 }, { "epoch": 0.7843857268300471, "grad_norm": 0.32094052331899886, "learning_rate": 2.3408483139472314e-06, "loss": 0.1694, "step": 9826 }, { "epoch": 0.7844655544024907, "grad_norm": 0.30633592598462356, "learning_rate": 2.3391862699512147e-06, "loss": 0.199, "step": 9827 }, { "epoch": 0.7845453819749342, "grad_norm": 0.2398614716982674, "learning_rate": 2.3375247380502763e-06, "loss": 0.1427, "step": 9828 }, { "epoch": 0.7846252095473777, "grad_norm": 0.34188351320471305, "learning_rate": 2.335863718355489e-06, "loss": 0.1318, "step": 9829 }, { "epoch": 0.7847050371198212, "grad_norm": 0.2695376793990458, "learning_rate": 2.3342032109778814e-06, "loss": 0.1245, "step": 9830 }, { "epoch": 0.7847848646922647, "grad_norm": 0.3000462481751833, "learning_rate": 2.3325432160284524e-06, "loss": 0.2053, "step": 9831 }, { "epoch": 0.7848646922647082, "grad_norm": 0.29246674752242746, "learning_rate": 2.3308837336181657e-06, "loss": 0.1693, "step": 9832 }, { "epoch": 0.7849445198371517, "grad_norm": 0.3821073139837374, "learning_rate": 2.329224763857949e-06, "loss": 0.1719, "step": 9833 }, { "epoch": 0.7850243474095953, "grad_norm": 0.29948406514349707, "learning_rate": 2.327566306858703e-06, "loss": 0.1305, "step": 9834 }, { "epoch": 0.7851041749820388, "grad_norm": 0.30528310678372284, "learning_rate": 2.325908362731286e-06, "loss": 0.1882, "step": 9835 }, { "epoch": 0.7851840025544823, "grad_norm": 0.28375509658122144, "learning_rate": 2.3242509315865258e-06, "loss": 0.1763, "step": 9836 }, { "epoch": 0.7852638301269258, "grad_norm": 0.33376433001093836, "learning_rate": 2.322594013535211e-06, "loss": 0.1004, "step": 9837 }, { "epoch": 0.7853436576993693, "grad_norm": 0.2913219864007139, "learning_rate": 2.3209376086881073e-06, "loss": 0.1861, "step": 9838 }, { "epoch": 0.7854234852718129, "grad_norm": 0.30312134941246305, "learning_rate": 2.3192817171559346e-06, "loss": 0.1718, "step": 9839 }, { "epoch": 0.7855033128442565, "grad_norm": 0.2816337715297175, "learning_rate": 2.3176263390493837e-06, "loss": 0.1231, "step": 9840 }, { "epoch": 0.7855831404167, "grad_norm": 0.27520914314172773, "learning_rate": 2.3159714744791094e-06, "loss": 0.2204, "step": 9841 }, { "epoch": 0.7856629679891435, "grad_norm": 0.3324326756245076, "learning_rate": 2.3143171235557315e-06, "loss": 0.175, "step": 9842 }, { "epoch": 0.785742795561587, "grad_norm": 0.28223706029637086, "learning_rate": 2.31266328638984e-06, "loss": 0.1478, "step": 9843 }, { "epoch": 0.7858226231340305, "grad_norm": 0.2689820372966089, "learning_rate": 2.311009963091986e-06, "loss": 0.1875, "step": 9844 }, { "epoch": 0.785902450706474, "grad_norm": 0.2738808110276403, "learning_rate": 2.3093571537726865e-06, "loss": 0.1641, "step": 9845 }, { "epoch": 0.7859822782789175, "grad_norm": 0.2627418244626938, "learning_rate": 2.3077048585424276e-06, "loss": 0.1277, "step": 9846 }, { "epoch": 0.786062105851361, "grad_norm": 0.26587767391524697, "learning_rate": 2.3060530775116542e-06, "loss": 0.1329, "step": 9847 }, { "epoch": 0.7861419334238046, "grad_norm": 0.3302341170078929, "learning_rate": 2.304401810790786e-06, "loss": 0.1639, "step": 9848 }, { "epoch": 0.7862217609962481, "grad_norm": 0.2673412184447852, "learning_rate": 2.3027510584902037e-06, "loss": 0.1889, "step": 9849 }, { "epoch": 0.7863015885686916, "grad_norm": 0.2661943851759921, "learning_rate": 2.30110082072025e-06, "loss": 0.1496, "step": 9850 }, { "epoch": 0.7863814161411351, "grad_norm": 0.2844418883557685, "learning_rate": 2.2994510975912376e-06, "loss": 0.1297, "step": 9851 }, { "epoch": 0.7864612437135786, "grad_norm": 0.2885062009362007, "learning_rate": 2.297801889213447e-06, "loss": 0.1932, "step": 9852 }, { "epoch": 0.7865410712860222, "grad_norm": 0.31080292292403794, "learning_rate": 2.2961531956971215e-06, "loss": 0.1745, "step": 9853 }, { "epoch": 0.7866208988584658, "grad_norm": 0.26309216038022015, "learning_rate": 2.294505017152463e-06, "loss": 0.1432, "step": 9854 }, { "epoch": 0.7867007264309093, "grad_norm": 0.27596356069377376, "learning_rate": 2.2928573536896524e-06, "loss": 0.1465, "step": 9855 }, { "epoch": 0.7867805540033528, "grad_norm": 0.29958536580610673, "learning_rate": 2.2912102054188246e-06, "loss": 0.1608, "step": 9856 }, { "epoch": 0.7868603815757963, "grad_norm": 0.3069281467499377, "learning_rate": 2.289563572450093e-06, "loss": 0.1185, "step": 9857 }, { "epoch": 0.7869402091482398, "grad_norm": 0.26455568783098116, "learning_rate": 2.287917454893518e-06, "loss": 0.1687, "step": 9858 }, { "epoch": 0.7870200367206833, "grad_norm": 0.31383714122553363, "learning_rate": 2.2862718528591434e-06, "loss": 0.1623, "step": 9859 }, { "epoch": 0.7870998642931268, "grad_norm": 0.3059833368183358, "learning_rate": 2.2846267664569688e-06, "loss": 0.2207, "step": 9860 }, { "epoch": 0.7871796918655704, "grad_norm": 0.30940872844705813, "learning_rate": 2.28298219579696e-06, "loss": 0.2205, "step": 9861 }, { "epoch": 0.7872595194380139, "grad_norm": 0.2793714681541268, "learning_rate": 2.2813381409890566e-06, "loss": 0.1675, "step": 9862 }, { "epoch": 0.7873393470104574, "grad_norm": 0.291609567375928, "learning_rate": 2.279694602143148e-06, "loss": 0.1847, "step": 9863 }, { "epoch": 0.7874191745829009, "grad_norm": 0.3245941184060426, "learning_rate": 2.278051579369105e-06, "loss": 0.1951, "step": 9864 }, { "epoch": 0.7874990021553444, "grad_norm": 0.28105043397353047, "learning_rate": 2.276409072776752e-06, "loss": 0.1623, "step": 9865 }, { "epoch": 0.787578829727788, "grad_norm": 0.27541091645698923, "learning_rate": 2.274767082475893e-06, "loss": 0.1424, "step": 9866 }, { "epoch": 0.7876586573002315, "grad_norm": 0.29894644368190404, "learning_rate": 2.2731256085762755e-06, "loss": 0.146, "step": 9867 }, { "epoch": 0.7877384848726751, "grad_norm": 0.24374446991989138, "learning_rate": 2.2714846511876365e-06, "loss": 0.1776, "step": 9868 }, { "epoch": 0.7878183124451186, "grad_norm": 0.2976855260317679, "learning_rate": 2.2698442104196637e-06, "loss": 0.1743, "step": 9869 }, { "epoch": 0.7878981400175621, "grad_norm": 0.3084426825410774, "learning_rate": 2.268204286382013e-06, "loss": 0.1504, "step": 9870 }, { "epoch": 0.7879779675900056, "grad_norm": 0.29995155561342995, "learning_rate": 2.2665648791843087e-06, "loss": 0.1658, "step": 9871 }, { "epoch": 0.7880577951624491, "grad_norm": 0.33432990199758084, "learning_rate": 2.2649259889361353e-06, "loss": 0.1554, "step": 9872 }, { "epoch": 0.7881376227348926, "grad_norm": 0.2663550531097572, "learning_rate": 2.2632876157470505e-06, "loss": 0.1429, "step": 9873 }, { "epoch": 0.7882174503073361, "grad_norm": 0.2734903955915021, "learning_rate": 2.2616497597265707e-06, "loss": 0.1521, "step": 9874 }, { "epoch": 0.7882972778797797, "grad_norm": 0.2875425912520218, "learning_rate": 2.2600124209841813e-06, "loss": 0.1412, "step": 9875 }, { "epoch": 0.7883771054522232, "grad_norm": 0.2936012439548459, "learning_rate": 2.258375599629331e-06, "loss": 0.1158, "step": 9876 }, { "epoch": 0.7884569330246667, "grad_norm": 0.26965544424742904, "learning_rate": 2.256739295771433e-06, "loss": 0.1452, "step": 9877 }, { "epoch": 0.7885367605971102, "grad_norm": 0.3294496025048484, "learning_rate": 2.2551035095198713e-06, "loss": 0.1551, "step": 9878 }, { "epoch": 0.7886165881695537, "grad_norm": 0.3089498907769475, "learning_rate": 2.2534682409839915e-06, "loss": 0.1768, "step": 9879 }, { "epoch": 0.7886964157419973, "grad_norm": 0.25938008964559134, "learning_rate": 2.251833490273102e-06, "loss": 0.1844, "step": 9880 }, { "epoch": 0.7887762433144409, "grad_norm": 0.2707570568811615, "learning_rate": 2.2501992574964793e-06, "loss": 0.1604, "step": 9881 }, { "epoch": 0.7888560708868844, "grad_norm": 0.32426104734072414, "learning_rate": 2.248565542763369e-06, "loss": 0.1985, "step": 9882 }, { "epoch": 0.7889358984593279, "grad_norm": 0.2805022234044252, "learning_rate": 2.2469323461829763e-06, "loss": 0.1239, "step": 9883 }, { "epoch": 0.7890157260317714, "grad_norm": 0.27351951212238457, "learning_rate": 2.2452996678644747e-06, "loss": 0.1475, "step": 9884 }, { "epoch": 0.7890955536042149, "grad_norm": 0.25323783554160834, "learning_rate": 2.243667507917001e-06, "loss": 0.1661, "step": 9885 }, { "epoch": 0.7891753811766584, "grad_norm": 0.2742157857631697, "learning_rate": 2.2420358664496578e-06, "loss": 0.1538, "step": 9886 }, { "epoch": 0.7892552087491019, "grad_norm": 0.28248075458688654, "learning_rate": 2.240404743571517e-06, "loss": 0.1155, "step": 9887 }, { "epoch": 0.7893350363215454, "grad_norm": 0.3221473589306567, "learning_rate": 2.2387741393916117e-06, "loss": 0.1684, "step": 9888 }, { "epoch": 0.789414863893989, "grad_norm": 0.29309090487119543, "learning_rate": 2.2371440540189406e-06, "loss": 0.1367, "step": 9889 }, { "epoch": 0.7894946914664325, "grad_norm": 0.29633065369201395, "learning_rate": 2.235514487562469e-06, "loss": 0.1469, "step": 9890 }, { "epoch": 0.789574519038876, "grad_norm": 0.32116577544814234, "learning_rate": 2.233885440131123e-06, "loss": 0.1292, "step": 9891 }, { "epoch": 0.7896543466113195, "grad_norm": 0.2978732603079887, "learning_rate": 2.2322569118338043e-06, "loss": 0.1581, "step": 9892 }, { "epoch": 0.7897341741837631, "grad_norm": 0.3018987499590215, "learning_rate": 2.2306289027793714e-06, "loss": 0.1714, "step": 9893 }, { "epoch": 0.7898140017562066, "grad_norm": 0.2628389866498121, "learning_rate": 2.229001413076649e-06, "loss": 0.1741, "step": 9894 }, { "epoch": 0.7898938293286502, "grad_norm": 0.2662617853548969, "learning_rate": 2.227374442834427e-06, "loss": 0.1481, "step": 9895 }, { "epoch": 0.7899736569010937, "grad_norm": 0.24902756656733055, "learning_rate": 2.2257479921614667e-06, "loss": 0.1458, "step": 9896 }, { "epoch": 0.7900534844735372, "grad_norm": 0.292076395151457, "learning_rate": 2.224122061166487e-06, "loss": 0.187, "step": 9897 }, { "epoch": 0.7901333120459807, "grad_norm": 0.2804709419646739, "learning_rate": 2.2224966499581746e-06, "loss": 0.1048, "step": 9898 }, { "epoch": 0.7902131396184242, "grad_norm": 0.2653418084006282, "learning_rate": 2.220871758645182e-06, "loss": 0.1256, "step": 9899 }, { "epoch": 0.7902929671908677, "grad_norm": 0.25524214604884043, "learning_rate": 2.2192473873361255e-06, "loss": 0.1615, "step": 9900 }, { "epoch": 0.7903727947633112, "grad_norm": 0.28428129266647245, "learning_rate": 2.2176235361395936e-06, "loss": 0.1399, "step": 9901 }, { "epoch": 0.7904526223357548, "grad_norm": 0.2750380995010432, "learning_rate": 2.216000205164126e-06, "loss": 0.1436, "step": 9902 }, { "epoch": 0.7905324499081983, "grad_norm": 0.2640023495207685, "learning_rate": 2.214377394518242e-06, "loss": 0.1634, "step": 9903 }, { "epoch": 0.7906122774806418, "grad_norm": 0.3321218456568311, "learning_rate": 2.2127551043104155e-06, "loss": 0.145, "step": 9904 }, { "epoch": 0.7906921050530853, "grad_norm": 0.2900109897766476, "learning_rate": 2.211133334649096e-06, "loss": 0.1416, "step": 9905 }, { "epoch": 0.7907719326255288, "grad_norm": 0.3567258383559241, "learning_rate": 2.209512085642691e-06, "loss": 0.1814, "step": 9906 }, { "epoch": 0.7908517601979724, "grad_norm": 0.3084095031891599, "learning_rate": 2.2078913573995687e-06, "loss": 0.1407, "step": 9907 }, { "epoch": 0.790931587770416, "grad_norm": 0.2974177756728792, "learning_rate": 2.206271150028074e-06, "loss": 0.1852, "step": 9908 }, { "epoch": 0.7910114153428595, "grad_norm": 0.2756170909632074, "learning_rate": 2.204651463636508e-06, "loss": 0.1485, "step": 9909 }, { "epoch": 0.791091242915303, "grad_norm": 0.3062645969118243, "learning_rate": 2.2030322983331478e-06, "loss": 0.1652, "step": 9910 }, { "epoch": 0.7911710704877465, "grad_norm": 0.29525083444004924, "learning_rate": 2.2014136542262166e-06, "loss": 0.1907, "step": 9911 }, { "epoch": 0.79125089806019, "grad_norm": 0.3283669577445802, "learning_rate": 2.1997955314239227e-06, "loss": 0.1311, "step": 9912 }, { "epoch": 0.7913307256326335, "grad_norm": 0.2894518717458113, "learning_rate": 2.1981779300344295e-06, "loss": 0.1716, "step": 9913 }, { "epoch": 0.791410553205077, "grad_norm": 0.28189187111561703, "learning_rate": 2.1965608501658663e-06, "loss": 0.1138, "step": 9914 }, { "epoch": 0.7914903807775205, "grad_norm": 0.2838829827989187, "learning_rate": 2.1949442919263286e-06, "loss": 0.1637, "step": 9915 }, { "epoch": 0.791570208349964, "grad_norm": 0.3136394132264246, "learning_rate": 2.1933282554238743e-06, "loss": 0.1671, "step": 9916 }, { "epoch": 0.7916500359224076, "grad_norm": 0.2951189172797966, "learning_rate": 2.191712740766535e-06, "loss": 0.1216, "step": 9917 }, { "epoch": 0.7917298634948511, "grad_norm": 0.2690464572540947, "learning_rate": 2.1900977480622975e-06, "loss": 0.1697, "step": 9918 }, { "epoch": 0.7918096910672946, "grad_norm": 0.30770585531259814, "learning_rate": 2.1884832774191188e-06, "loss": 0.1679, "step": 9919 }, { "epoch": 0.7918895186397381, "grad_norm": 0.271153644067099, "learning_rate": 2.1868693289449206e-06, "loss": 0.209, "step": 9920 }, { "epoch": 0.7919693462121817, "grad_norm": 0.273539783198909, "learning_rate": 2.185255902747584e-06, "loss": 0.1514, "step": 9921 }, { "epoch": 0.7920491737846252, "grad_norm": 0.26715811496323993, "learning_rate": 2.1836429989349673e-06, "loss": 0.162, "step": 9922 }, { "epoch": 0.7921290013570688, "grad_norm": 0.32364576259983785, "learning_rate": 2.1820306176148833e-06, "loss": 0.1626, "step": 9923 }, { "epoch": 0.7922088289295123, "grad_norm": 0.32311233538932677, "learning_rate": 2.180418758895113e-06, "loss": 0.174, "step": 9924 }, { "epoch": 0.7922886565019558, "grad_norm": 0.29265616688955665, "learning_rate": 2.178807422883401e-06, "loss": 0.1678, "step": 9925 }, { "epoch": 0.7923684840743993, "grad_norm": 0.33705420586736545, "learning_rate": 2.177196609687464e-06, "loss": 0.1556, "step": 9926 }, { "epoch": 0.7924483116468428, "grad_norm": 0.3212944701255467, "learning_rate": 2.175586319414975e-06, "loss": 0.1171, "step": 9927 }, { "epoch": 0.7925281392192863, "grad_norm": 0.2883472165113368, "learning_rate": 2.173976552173577e-06, "loss": 0.1836, "step": 9928 }, { "epoch": 0.7926079667917298, "grad_norm": 0.2708382935545148, "learning_rate": 2.172367308070874e-06, "loss": 0.1594, "step": 9929 }, { "epoch": 0.7926877943641734, "grad_norm": 0.3179253770620909, "learning_rate": 2.1707585872144377e-06, "loss": 0.1995, "step": 9930 }, { "epoch": 0.7927676219366169, "grad_norm": 0.27803885076821955, "learning_rate": 2.1691503897118095e-06, "loss": 0.1601, "step": 9931 }, { "epoch": 0.7928474495090604, "grad_norm": 0.2971084974785205, "learning_rate": 2.1675427156704863e-06, "loss": 0.1523, "step": 9932 }, { "epoch": 0.7929272770815039, "grad_norm": 0.2631934010338136, "learning_rate": 2.1659355651979377e-06, "loss": 0.1434, "step": 9933 }, { "epoch": 0.7930071046539475, "grad_norm": 0.31128032280779244, "learning_rate": 2.1643289384015897e-06, "loss": 0.1775, "step": 9934 }, { "epoch": 0.793086932226391, "grad_norm": 0.24983997324538218, "learning_rate": 2.162722835388846e-06, "loss": 0.1466, "step": 9935 }, { "epoch": 0.7931667597988346, "grad_norm": 0.29356982667565695, "learning_rate": 2.1611172562670655e-06, "loss": 0.1763, "step": 9936 }, { "epoch": 0.7932465873712781, "grad_norm": 0.27069787202765355, "learning_rate": 2.1595122011435754e-06, "loss": 0.1811, "step": 9937 }, { "epoch": 0.7933264149437216, "grad_norm": 0.30813129405767203, "learning_rate": 2.1579076701256664e-06, "loss": 0.1966, "step": 9938 }, { "epoch": 0.7934062425161651, "grad_norm": 0.28074418701872766, "learning_rate": 2.1563036633205912e-06, "loss": 0.1491, "step": 9939 }, { "epoch": 0.7934860700886086, "grad_norm": 0.3446978928764472, "learning_rate": 2.1547001808355795e-06, "loss": 0.1563, "step": 9940 }, { "epoch": 0.7935658976610521, "grad_norm": 0.27656292244341285, "learning_rate": 2.1530972227778135e-06, "loss": 0.1854, "step": 9941 }, { "epoch": 0.7936457252334956, "grad_norm": 0.2550363535168454, "learning_rate": 2.1514947892544437e-06, "loss": 0.1902, "step": 9942 }, { "epoch": 0.7937255528059391, "grad_norm": 0.28993272151774696, "learning_rate": 2.149892880372588e-06, "loss": 0.1818, "step": 9943 }, { "epoch": 0.7938053803783827, "grad_norm": 0.27887159832248165, "learning_rate": 2.1482914962393244e-06, "loss": 0.1852, "step": 9944 }, { "epoch": 0.7938852079508262, "grad_norm": 0.3420576513045412, "learning_rate": 2.146690636961707e-06, "loss": 0.1643, "step": 9945 }, { "epoch": 0.7939650355232697, "grad_norm": 0.27793121327280157, "learning_rate": 2.1450903026467353e-06, "loss": 0.1939, "step": 9946 }, { "epoch": 0.7940448630957132, "grad_norm": 0.31498454132267295, "learning_rate": 2.1434904934013954e-06, "loss": 0.1323, "step": 9947 }, { "epoch": 0.7941246906681568, "grad_norm": 0.2947232034504037, "learning_rate": 2.1418912093326218e-06, "loss": 0.1502, "step": 9948 }, { "epoch": 0.7942045182406003, "grad_norm": 0.26189052801630425, "learning_rate": 2.140292450547329e-06, "loss": 0.1715, "step": 9949 }, { "epoch": 0.7942843458130439, "grad_norm": 0.2673973470989968, "learning_rate": 2.138694217152377e-06, "loss": 0.1662, "step": 9950 }, { "epoch": 0.7943641733854874, "grad_norm": 0.29422943025639536, "learning_rate": 2.1370965092546047e-06, "loss": 0.1759, "step": 9951 }, { "epoch": 0.7944440009579309, "grad_norm": 0.2896664328828368, "learning_rate": 2.135499326960817e-06, "loss": 0.1513, "step": 9952 }, { "epoch": 0.7945238285303744, "grad_norm": 0.3139562860625185, "learning_rate": 2.133902670377773e-06, "loss": 0.1579, "step": 9953 }, { "epoch": 0.7946036561028179, "grad_norm": 0.24784053007106735, "learning_rate": 2.1323065396122113e-06, "loss": 0.1563, "step": 9954 }, { "epoch": 0.7946834836752614, "grad_norm": 0.2670387781699933, "learning_rate": 2.1307109347708168e-06, "loss": 0.1649, "step": 9955 }, { "epoch": 0.7947633112477049, "grad_norm": 0.23867057033864547, "learning_rate": 2.1291158559602564e-06, "loss": 0.1326, "step": 9956 }, { "epoch": 0.7948431388201485, "grad_norm": 0.2505728475811062, "learning_rate": 2.127521303287152e-06, "loss": 0.1329, "step": 9957 }, { "epoch": 0.794922966392592, "grad_norm": 0.2982284959498755, "learning_rate": 2.1259272768580953e-06, "loss": 0.1542, "step": 9958 }, { "epoch": 0.7950027939650355, "grad_norm": 0.29919979356381454, "learning_rate": 2.1243337767796378e-06, "loss": 0.1384, "step": 9959 }, { "epoch": 0.795082621537479, "grad_norm": 0.3175752283205654, "learning_rate": 2.1227408031582972e-06, "loss": 0.153, "step": 9960 }, { "epoch": 0.7951624491099226, "grad_norm": 0.26992825836954154, "learning_rate": 2.121148356100564e-06, "loss": 0.1542, "step": 9961 }, { "epoch": 0.7952422766823661, "grad_norm": 0.2737268608985782, "learning_rate": 2.119556435712883e-06, "loss": 0.1723, "step": 9962 }, { "epoch": 0.7953221042548096, "grad_norm": 0.28739153329423467, "learning_rate": 2.117965042101667e-06, "loss": 0.1931, "step": 9963 }, { "epoch": 0.7954019318272532, "grad_norm": 0.2611116688137865, "learning_rate": 2.1163741753732936e-06, "loss": 0.1236, "step": 9964 }, { "epoch": 0.7954817593996967, "grad_norm": 0.2880856902779312, "learning_rate": 2.114783835634111e-06, "loss": 0.1752, "step": 9965 }, { "epoch": 0.7955615869721402, "grad_norm": 0.27645500697807135, "learning_rate": 2.1131940229904226e-06, "loss": 0.1592, "step": 9966 }, { "epoch": 0.7956414145445837, "grad_norm": 0.31972106327745836, "learning_rate": 2.1116047375485025e-06, "loss": 0.166, "step": 9967 }, { "epoch": 0.7957212421170272, "grad_norm": 0.27711681786302855, "learning_rate": 2.1100159794145893e-06, "loss": 0.1579, "step": 9968 }, { "epoch": 0.7958010696894707, "grad_norm": 0.2854469298727938, "learning_rate": 2.1084277486948803e-06, "loss": 0.172, "step": 9969 }, { "epoch": 0.7958808972619142, "grad_norm": 0.2633461306374494, "learning_rate": 2.1068400454955484e-06, "loss": 0.1477, "step": 9970 }, { "epoch": 0.7959607248343578, "grad_norm": 0.2761718160602432, "learning_rate": 2.1052528699227226e-06, "loss": 0.1241, "step": 9971 }, { "epoch": 0.7960405524068013, "grad_norm": 0.35866946243235515, "learning_rate": 2.103666222082501e-06, "loss": 0.1492, "step": 9972 }, { "epoch": 0.7961203799792448, "grad_norm": 0.2611830769812276, "learning_rate": 2.1020801020809422e-06, "loss": 0.1216, "step": 9973 }, { "epoch": 0.7962002075516883, "grad_norm": 0.297192144053422, "learning_rate": 2.1004945100240704e-06, "loss": 0.1895, "step": 9974 }, { "epoch": 0.7962800351241319, "grad_norm": 0.27741035382313467, "learning_rate": 2.0989094460178827e-06, "loss": 0.1798, "step": 9975 }, { "epoch": 0.7963598626965754, "grad_norm": 0.26566616300555906, "learning_rate": 2.097324910168329e-06, "loss": 0.1583, "step": 9976 }, { "epoch": 0.796439690269019, "grad_norm": 0.2679666204081448, "learning_rate": 2.0957409025813323e-06, "loss": 0.1926, "step": 9977 }, { "epoch": 0.7965195178414625, "grad_norm": 0.27345795272896484, "learning_rate": 2.094157423362773e-06, "loss": 0.1296, "step": 9978 }, { "epoch": 0.796599345413906, "grad_norm": 0.270395043668827, "learning_rate": 2.092574472618505e-06, "loss": 0.132, "step": 9979 }, { "epoch": 0.7966791729863495, "grad_norm": 0.2888038606079886, "learning_rate": 2.09099205045434e-06, "loss": 0.1465, "step": 9980 }, { "epoch": 0.796759000558793, "grad_norm": 0.25417357084924386, "learning_rate": 2.0894101569760584e-06, "loss": 0.1563, "step": 9981 }, { "epoch": 0.7968388281312365, "grad_norm": 0.3193865568282936, "learning_rate": 2.0878287922894023e-06, "loss": 0.1665, "step": 9982 }, { "epoch": 0.79691865570368, "grad_norm": 0.275560078438354, "learning_rate": 2.086247956500076e-06, "loss": 0.1612, "step": 9983 }, { "epoch": 0.7969984832761235, "grad_norm": 0.288259379918479, "learning_rate": 2.0846676497137595e-06, "loss": 0.1375, "step": 9984 }, { "epoch": 0.7970783108485671, "grad_norm": 0.294551918716233, "learning_rate": 2.0830878720360858e-06, "loss": 0.162, "step": 9985 }, { "epoch": 0.7971581384210106, "grad_norm": 0.27893709927703314, "learning_rate": 2.081508623572657e-06, "loss": 0.138, "step": 9986 }, { "epoch": 0.7972379659934541, "grad_norm": 0.25763220444595997, "learning_rate": 2.07992990442904e-06, "loss": 0.1836, "step": 9987 }, { "epoch": 0.7973177935658977, "grad_norm": 0.28695747920823816, "learning_rate": 2.0783517147107634e-06, "loss": 0.2024, "step": 9988 }, { "epoch": 0.7973976211383412, "grad_norm": 0.3251509699213132, "learning_rate": 2.0767740545233307e-06, "loss": 0.1357, "step": 9989 }, { "epoch": 0.7974774487107847, "grad_norm": 0.28315873379345624, "learning_rate": 2.0751969239721914e-06, "loss": 0.1434, "step": 9990 }, { "epoch": 0.7975572762832283, "grad_norm": 0.33650230935204994, "learning_rate": 2.073620323162778e-06, "loss": 0.1516, "step": 9991 }, { "epoch": 0.7976371038556718, "grad_norm": 0.33818476426578525, "learning_rate": 2.072044252200477e-06, "loss": 0.1383, "step": 9992 }, { "epoch": 0.7977169314281153, "grad_norm": 0.29501527746777256, "learning_rate": 2.0704687111906472e-06, "loss": 0.1889, "step": 9993 }, { "epoch": 0.7977967590005588, "grad_norm": 0.24027929867887018, "learning_rate": 2.068893700238601e-06, "loss": 0.1559, "step": 9994 }, { "epoch": 0.7978765865730023, "grad_norm": 0.2981950303287325, "learning_rate": 2.067319219449623e-06, "loss": 0.1731, "step": 9995 }, { "epoch": 0.7979564141454458, "grad_norm": 0.2678579844428131, "learning_rate": 2.0657452689289638e-06, "loss": 0.1779, "step": 9996 }, { "epoch": 0.7980362417178893, "grad_norm": 0.29800998531173467, "learning_rate": 2.064171848781834e-06, "loss": 0.1407, "step": 9997 }, { "epoch": 0.7981160692903329, "grad_norm": 0.27901559485928984, "learning_rate": 2.062598959113411e-06, "loss": 0.1735, "step": 9998 }, { "epoch": 0.7981958968627764, "grad_norm": 0.2857160485110969, "learning_rate": 2.0610266000288328e-06, "loss": 0.1986, "step": 9999 }, { "epoch": 0.7982757244352199, "grad_norm": 0.3135556222695118, "learning_rate": 2.059454771633211e-06, "loss": 0.153, "step": 10000 }, { "epoch": 0.7983555520076634, "grad_norm": 0.24819576965374177, "learning_rate": 2.0578834740316123e-06, "loss": 0.1451, "step": 10001 }, { "epoch": 0.798435379580107, "grad_norm": 0.28413518204384586, "learning_rate": 2.056312707329078e-06, "loss": 0.1673, "step": 10002 }, { "epoch": 0.7985152071525505, "grad_norm": 0.2652895686373487, "learning_rate": 2.0547424716305995e-06, "loss": 0.1755, "step": 10003 }, { "epoch": 0.798595034724994, "grad_norm": 0.2711013351951616, "learning_rate": 2.0531727670411418e-06, "loss": 0.1482, "step": 10004 }, { "epoch": 0.7986748622974376, "grad_norm": 0.2543722970075734, "learning_rate": 2.0516035936656377e-06, "loss": 0.156, "step": 10005 }, { "epoch": 0.7987546898698811, "grad_norm": 0.3111794969594107, "learning_rate": 2.050034951608978e-06, "loss": 0.1355, "step": 10006 }, { "epoch": 0.7988345174423246, "grad_norm": 0.2932812798546623, "learning_rate": 2.048466840976021e-06, "loss": 0.1621, "step": 10007 }, { "epoch": 0.7989143450147681, "grad_norm": 0.2943476150778606, "learning_rate": 2.0468992618715855e-06, "loss": 0.1544, "step": 10008 }, { "epoch": 0.7989941725872116, "grad_norm": 0.33445084820906035, "learning_rate": 2.045332214400464e-06, "loss": 0.2309, "step": 10009 }, { "epoch": 0.7990740001596551, "grad_norm": 0.3019429452383207, "learning_rate": 2.0437656986674028e-06, "loss": 0.153, "step": 10010 }, { "epoch": 0.7991538277320986, "grad_norm": 0.288847268235134, "learning_rate": 2.042199714777119e-06, "loss": 0.1841, "step": 10011 }, { "epoch": 0.7992336553045422, "grad_norm": 0.30341061516325263, "learning_rate": 2.0406342628342914e-06, "loss": 0.1459, "step": 10012 }, { "epoch": 0.7993134828769857, "grad_norm": 0.23651447209812368, "learning_rate": 2.0390693429435626e-06, "loss": 0.1184, "step": 10013 }, { "epoch": 0.7993933104494292, "grad_norm": 0.26572727343840535, "learning_rate": 2.037504955209546e-06, "loss": 0.1118, "step": 10014 }, { "epoch": 0.7994731380218728, "grad_norm": 0.28921761798459233, "learning_rate": 2.035941099736811e-06, "loss": 0.1443, "step": 10015 }, { "epoch": 0.7995529655943163, "grad_norm": 0.2841057865920751, "learning_rate": 2.034377776629898e-06, "loss": 0.1127, "step": 10016 }, { "epoch": 0.7996327931667598, "grad_norm": 0.3286876219671292, "learning_rate": 2.0328149859933065e-06, "loss": 0.1685, "step": 10017 }, { "epoch": 0.7997126207392034, "grad_norm": 0.2762084209793208, "learning_rate": 2.031252727931502e-06, "loss": 0.1771, "step": 10018 }, { "epoch": 0.7997924483116469, "grad_norm": 0.25765647940046016, "learning_rate": 2.029691002548918e-06, "loss": 0.1205, "step": 10019 }, { "epoch": 0.7998722758840904, "grad_norm": 0.31629569123491585, "learning_rate": 2.02812980994995e-06, "loss": 0.1141, "step": 10020 }, { "epoch": 0.7999521034565339, "grad_norm": 0.29160069581184717, "learning_rate": 2.026569150238955e-06, "loss": 0.1545, "step": 10021 }, { "epoch": 0.8000319310289774, "grad_norm": 0.2702846313606021, "learning_rate": 2.025009023520257e-06, "loss": 0.1574, "step": 10022 }, { "epoch": 0.8001117586014209, "grad_norm": 0.32340711022477286, "learning_rate": 2.0234494298981468e-06, "loss": 0.1709, "step": 10023 }, { "epoch": 0.8001915861738644, "grad_norm": 0.2872842996414472, "learning_rate": 2.021890369476879e-06, "loss": 0.1857, "step": 10024 }, { "epoch": 0.800271413746308, "grad_norm": 0.26970515090489144, "learning_rate": 2.020331842360661e-06, "loss": 0.1702, "step": 10025 }, { "epoch": 0.8003512413187515, "grad_norm": 0.3172748364070853, "learning_rate": 2.0187738486536833e-06, "loss": 0.2057, "step": 10026 }, { "epoch": 0.800431068891195, "grad_norm": 0.3303505379608419, "learning_rate": 2.017216388460086e-06, "loss": 0.1611, "step": 10027 }, { "epoch": 0.8005108964636385, "grad_norm": 0.31925108417224174, "learning_rate": 2.0156594618839854e-06, "loss": 0.157, "step": 10028 }, { "epoch": 0.8005907240360821, "grad_norm": 0.30087438269747946, "learning_rate": 2.0141030690294517e-06, "loss": 0.1627, "step": 10029 }, { "epoch": 0.8006705516085256, "grad_norm": 0.2720281724362342, "learning_rate": 2.012547210000524e-06, "loss": 0.1964, "step": 10030 }, { "epoch": 0.8007503791809691, "grad_norm": 0.2724159458992173, "learning_rate": 2.0109918849012033e-06, "loss": 0.1643, "step": 10031 }, { "epoch": 0.8008302067534127, "grad_norm": 0.27933090601199856, "learning_rate": 2.0094370938354624e-06, "loss": 0.171, "step": 10032 }, { "epoch": 0.8009100343258562, "grad_norm": 0.33960712412521615, "learning_rate": 2.0078828369072323e-06, "loss": 0.1243, "step": 10033 }, { "epoch": 0.8009898618982997, "grad_norm": 0.28388590217741716, "learning_rate": 2.0063291142204e-06, "loss": 0.1618, "step": 10034 }, { "epoch": 0.8010696894707432, "grad_norm": 0.2884366930088375, "learning_rate": 2.0047759258788357e-06, "loss": 0.1604, "step": 10035 }, { "epoch": 0.8011495170431867, "grad_norm": 0.287892692332985, "learning_rate": 2.0032232719863576e-06, "loss": 0.168, "step": 10036 }, { "epoch": 0.8012293446156302, "grad_norm": 0.2620348457100575, "learning_rate": 2.0016711526467615e-06, "loss": 0.1518, "step": 10037 }, { "epoch": 0.8013091721880737, "grad_norm": 0.26858784330311286, "learning_rate": 2.000119567963792e-06, "loss": 0.1805, "step": 10038 }, { "epoch": 0.8013889997605173, "grad_norm": 0.3099292942828399, "learning_rate": 1.998568518041172e-06, "loss": 0.2024, "step": 10039 }, { "epoch": 0.8014688273329608, "grad_norm": 0.35170844031079224, "learning_rate": 1.9970180029825823e-06, "loss": 0.1849, "step": 10040 }, { "epoch": 0.8015486549054043, "grad_norm": 0.30408311062069676, "learning_rate": 1.995468022891668e-06, "loss": 0.1669, "step": 10041 }, { "epoch": 0.8016284824778479, "grad_norm": 0.3147545617639889, "learning_rate": 1.9939185778720382e-06, "loss": 0.1802, "step": 10042 }, { "epoch": 0.8017083100502914, "grad_norm": 0.2977703327671625, "learning_rate": 1.9923696680272663e-06, "loss": 0.1279, "step": 10043 }, { "epoch": 0.8017881376227349, "grad_norm": 0.34084718761399935, "learning_rate": 1.990821293460895e-06, "loss": 0.1546, "step": 10044 }, { "epoch": 0.8018679651951784, "grad_norm": 0.28544651696225626, "learning_rate": 1.9892734542764214e-06, "loss": 0.2047, "step": 10045 }, { "epoch": 0.801947792767622, "grad_norm": 0.28331862215364373, "learning_rate": 1.9877261505773214e-06, "loss": 0.1389, "step": 10046 }, { "epoch": 0.8020276203400655, "grad_norm": 0.27306925739778587, "learning_rate": 1.9861793824670172e-06, "loss": 0.1396, "step": 10047 }, { "epoch": 0.802107447912509, "grad_norm": 0.275611278239142, "learning_rate": 1.9846331500489045e-06, "loss": 0.117, "step": 10048 }, { "epoch": 0.8021872754849525, "grad_norm": 0.3617950757681822, "learning_rate": 1.9830874534263488e-06, "loss": 0.1585, "step": 10049 }, { "epoch": 0.802267103057396, "grad_norm": 0.3571867651500153, "learning_rate": 1.9815422927026707e-06, "loss": 0.1359, "step": 10050 }, { "epoch": 0.8023469306298395, "grad_norm": 0.3170964306449361, "learning_rate": 1.979997667981157e-06, "loss": 0.1313, "step": 10051 }, { "epoch": 0.802426758202283, "grad_norm": 0.33124253571022017, "learning_rate": 1.978453579365058e-06, "loss": 0.1521, "step": 10052 }, { "epoch": 0.8025065857747266, "grad_norm": 0.2720819937258062, "learning_rate": 1.976910026957596e-06, "loss": 0.1852, "step": 10053 }, { "epoch": 0.8025864133471701, "grad_norm": 0.3177411663954477, "learning_rate": 1.9753670108619472e-06, "loss": 0.2076, "step": 10054 }, { "epoch": 0.8026662409196136, "grad_norm": 0.31139405613903015, "learning_rate": 1.9738245311812576e-06, "loss": 0.1154, "step": 10055 }, { "epoch": 0.8027460684920572, "grad_norm": 0.30540753940244114, "learning_rate": 1.972282588018635e-06, "loss": 0.1328, "step": 10056 }, { "epoch": 0.8028258960645007, "grad_norm": 0.2917580330496759, "learning_rate": 1.970741181477149e-06, "loss": 0.1223, "step": 10057 }, { "epoch": 0.8029057236369442, "grad_norm": 0.33201363755331803, "learning_rate": 1.9692003116598434e-06, "loss": 0.2231, "step": 10058 }, { "epoch": 0.8029855512093877, "grad_norm": 0.2907441230734711, "learning_rate": 1.9676599786697148e-06, "loss": 0.1105, "step": 10059 }, { "epoch": 0.8030653787818313, "grad_norm": 0.27836564081185605, "learning_rate": 1.9661201826097297e-06, "loss": 0.1213, "step": 10060 }, { "epoch": 0.8031452063542748, "grad_norm": 0.2667708597976178, "learning_rate": 1.964580923582814e-06, "loss": 0.2143, "step": 10061 }, { "epoch": 0.8032250339267183, "grad_norm": 0.2965537881368753, "learning_rate": 1.9630422016918673e-06, "loss": 0.198, "step": 10062 }, { "epoch": 0.8033048614991618, "grad_norm": 0.28507554365706483, "learning_rate": 1.961504017039744e-06, "loss": 0.1393, "step": 10063 }, { "epoch": 0.8033846890716053, "grad_norm": 0.2875956172898768, "learning_rate": 1.9599663697292638e-06, "loss": 0.1784, "step": 10064 }, { "epoch": 0.8034645166440488, "grad_norm": 0.2991917465979626, "learning_rate": 1.9584292598632148e-06, "loss": 0.1513, "step": 10065 }, { "epoch": 0.8035443442164923, "grad_norm": 0.2721920743442469, "learning_rate": 1.9568926875443437e-06, "loss": 0.15, "step": 10066 }, { "epoch": 0.8036241717889359, "grad_norm": 0.3096028487444136, "learning_rate": 1.9553566528753686e-06, "loss": 0.1983, "step": 10067 }, { "epoch": 0.8037039993613794, "grad_norm": 0.2610285365788423, "learning_rate": 1.953821155958966e-06, "loss": 0.1361, "step": 10068 }, { "epoch": 0.803783826933823, "grad_norm": 0.29057112915741007, "learning_rate": 1.952286196897777e-06, "loss": 0.1427, "step": 10069 }, { "epoch": 0.8038636545062665, "grad_norm": 0.24249260339481005, "learning_rate": 1.950751775794406e-06, "loss": 0.1907, "step": 10070 }, { "epoch": 0.80394348207871, "grad_norm": 0.2974663862382265, "learning_rate": 1.9492178927514237e-06, "loss": 0.2151, "step": 10071 }, { "epoch": 0.8040233096511535, "grad_norm": 0.33188577731191354, "learning_rate": 1.947684547871367e-06, "loss": 0.1398, "step": 10072 }, { "epoch": 0.804103137223597, "grad_norm": 0.27391447409219183, "learning_rate": 1.946151741256731e-06, "loss": 0.187, "step": 10073 }, { "epoch": 0.8041829647960406, "grad_norm": 0.2732931967829425, "learning_rate": 1.9446194730099787e-06, "loss": 0.1644, "step": 10074 }, { "epoch": 0.8042627923684841, "grad_norm": 0.30421631699568846, "learning_rate": 1.943087743233535e-06, "loss": 0.1571, "step": 10075 }, { "epoch": 0.8043426199409276, "grad_norm": 0.2724096812083314, "learning_rate": 1.941556552029792e-06, "loss": 0.1594, "step": 10076 }, { "epoch": 0.8044224475133711, "grad_norm": 0.29062563993729956, "learning_rate": 1.940025899501107e-06, "loss": 0.184, "step": 10077 }, { "epoch": 0.8045022750858146, "grad_norm": 0.2990825293575257, "learning_rate": 1.9384957857497864e-06, "loss": 0.1416, "step": 10078 }, { "epoch": 0.8045821026582581, "grad_norm": 0.28833438918781745, "learning_rate": 1.936966210878124e-06, "loss": 0.1849, "step": 10079 }, { "epoch": 0.8046619302307016, "grad_norm": 0.2651370791944633, "learning_rate": 1.9354371749883592e-06, "loss": 0.1302, "step": 10080 }, { "epoch": 0.8047417578031452, "grad_norm": 0.27711353158221796, "learning_rate": 1.933908678182709e-06, "loss": 0.1999, "step": 10081 }, { "epoch": 0.8048215853755887, "grad_norm": 0.26235885396109787, "learning_rate": 1.9323807205633373e-06, "loss": 0.139, "step": 10082 }, { "epoch": 0.8049014129480323, "grad_norm": 0.28393941522169314, "learning_rate": 1.9308533022323905e-06, "loss": 0.1482, "step": 10083 }, { "epoch": 0.8049812405204758, "grad_norm": 0.28638494437654105, "learning_rate": 1.9293264232919673e-06, "loss": 0.1339, "step": 10084 }, { "epoch": 0.8050610680929193, "grad_norm": 0.30236955640664737, "learning_rate": 1.9278000838441335e-06, "loss": 0.1691, "step": 10085 }, { "epoch": 0.8051408956653628, "grad_norm": 0.28572018251710296, "learning_rate": 1.9262742839909198e-06, "loss": 0.2075, "step": 10086 }, { "epoch": 0.8052207232378064, "grad_norm": 0.2872520315046563, "learning_rate": 1.9247490238343156e-06, "loss": 0.1584, "step": 10087 }, { "epoch": 0.8053005508102499, "grad_norm": 0.2836562179165345, "learning_rate": 1.9232243034762843e-06, "loss": 0.1426, "step": 10088 }, { "epoch": 0.8053803783826934, "grad_norm": 0.29666995684467484, "learning_rate": 1.921700123018746e-06, "loss": 0.1579, "step": 10089 }, { "epoch": 0.8054602059551369, "grad_norm": 0.29802251280713027, "learning_rate": 1.920176482563585e-06, "loss": 0.1809, "step": 10090 }, { "epoch": 0.8055400335275804, "grad_norm": 0.2827980710072831, "learning_rate": 1.9186533822126506e-06, "loss": 0.1365, "step": 10091 }, { "epoch": 0.8056198611000239, "grad_norm": 0.33100623289036263, "learning_rate": 1.917130822067753e-06, "loss": 0.1937, "step": 10092 }, { "epoch": 0.8056996886724674, "grad_norm": 0.2903217125621076, "learning_rate": 1.915608802230676e-06, "loss": 0.1316, "step": 10093 }, { "epoch": 0.805779516244911, "grad_norm": 0.2585889255815859, "learning_rate": 1.9140873228031575e-06, "loss": 0.2002, "step": 10094 }, { "epoch": 0.8058593438173545, "grad_norm": 0.27638983026637437, "learning_rate": 1.9125663838869026e-06, "loss": 0.1265, "step": 10095 }, { "epoch": 0.805939171389798, "grad_norm": 0.26275863896442175, "learning_rate": 1.9110459855835763e-06, "loss": 0.1441, "step": 10096 }, { "epoch": 0.8060189989622416, "grad_norm": 0.30321708372957895, "learning_rate": 1.909526127994816e-06, "loss": 0.1609, "step": 10097 }, { "epoch": 0.8060988265346851, "grad_norm": 0.2884074311576218, "learning_rate": 1.9080068112222183e-06, "loss": 0.1514, "step": 10098 }, { "epoch": 0.8061786541071286, "grad_norm": 0.33157842305881347, "learning_rate": 1.906488035367341e-06, "loss": 0.1403, "step": 10099 }, { "epoch": 0.8062584816795721, "grad_norm": 0.3187865445767317, "learning_rate": 1.9049698005317085e-06, "loss": 0.1202, "step": 10100 }, { "epoch": 0.8063383092520157, "grad_norm": 0.3299886636322048, "learning_rate": 1.9034521068168076e-06, "loss": 0.1622, "step": 10101 }, { "epoch": 0.8064181368244592, "grad_norm": 0.2464605230568616, "learning_rate": 1.901934954324095e-06, "loss": 0.1455, "step": 10102 }, { "epoch": 0.8064979643969027, "grad_norm": 0.29010015828663993, "learning_rate": 1.9004183431549827e-06, "loss": 0.164, "step": 10103 }, { "epoch": 0.8065777919693462, "grad_norm": 0.2955210134191255, "learning_rate": 1.8989022734108498e-06, "loss": 0.1621, "step": 10104 }, { "epoch": 0.8066576195417897, "grad_norm": 0.3474593741139067, "learning_rate": 1.8973867451930394e-06, "loss": 0.1993, "step": 10105 }, { "epoch": 0.8067374471142332, "grad_norm": 0.2741596522547288, "learning_rate": 1.8958717586028608e-06, "loss": 0.1473, "step": 10106 }, { "epoch": 0.8068172746866767, "grad_norm": 0.26170398171280496, "learning_rate": 1.8943573137415848e-06, "loss": 0.1613, "step": 10107 }, { "epoch": 0.8068971022591203, "grad_norm": 0.2798674392657088, "learning_rate": 1.8928434107104443e-06, "loss": 0.1366, "step": 10108 }, { "epoch": 0.8069769298315638, "grad_norm": 0.31979214735216827, "learning_rate": 1.891330049610638e-06, "loss": 0.1677, "step": 10109 }, { "epoch": 0.8070567574040074, "grad_norm": 0.2800513485221998, "learning_rate": 1.889817230543327e-06, "loss": 0.1139, "step": 10110 }, { "epoch": 0.8071365849764509, "grad_norm": 0.3177273489486993, "learning_rate": 1.888304953609641e-06, "loss": 0.1511, "step": 10111 }, { "epoch": 0.8072164125488944, "grad_norm": 0.2709196745157037, "learning_rate": 1.8867932189106663e-06, "loss": 0.1408, "step": 10112 }, { "epoch": 0.8072962401213379, "grad_norm": 0.29646858736655424, "learning_rate": 1.8852820265474592e-06, "loss": 0.1664, "step": 10113 }, { "epoch": 0.8073760676937815, "grad_norm": 0.3037429589049539, "learning_rate": 1.8837713766210342e-06, "loss": 0.1404, "step": 10114 }, { "epoch": 0.807455895266225, "grad_norm": 0.3155711341721322, "learning_rate": 1.8822612692323705e-06, "loss": 0.1416, "step": 10115 }, { "epoch": 0.8075357228386685, "grad_norm": 0.28674690189980706, "learning_rate": 1.8807517044824219e-06, "loss": 0.1891, "step": 10116 }, { "epoch": 0.807615550411112, "grad_norm": 0.29730348633502746, "learning_rate": 1.8792426824720845e-06, "loss": 0.1922, "step": 10117 }, { "epoch": 0.8076953779835555, "grad_norm": 0.3810379681251317, "learning_rate": 1.8777342033022395e-06, "loss": 0.1648, "step": 10118 }, { "epoch": 0.807775205555999, "grad_norm": 0.30549818561478664, "learning_rate": 1.8762262670737165e-06, "loss": 0.1311, "step": 10119 }, { "epoch": 0.8078550331284425, "grad_norm": 0.2587869520689671, "learning_rate": 1.8747188738873223e-06, "loss": 0.1034, "step": 10120 }, { "epoch": 0.807934860700886, "grad_norm": 0.29901227782092266, "learning_rate": 1.8732120238438178e-06, "loss": 0.1696, "step": 10121 }, { "epoch": 0.8080146882733296, "grad_norm": 0.2919464433691424, "learning_rate": 1.8717057170439234e-06, "loss": 0.1354, "step": 10122 }, { "epoch": 0.8080945158457731, "grad_norm": 0.239684451991935, "learning_rate": 1.8701999535883375e-06, "loss": 0.1671, "step": 10123 }, { "epoch": 0.8081743434182167, "grad_norm": 0.25740707524972295, "learning_rate": 1.8686947335777083e-06, "loss": 0.1134, "step": 10124 }, { "epoch": 0.8082541709906602, "grad_norm": 0.288612967131544, "learning_rate": 1.8671900571126634e-06, "loss": 0.1748, "step": 10125 }, { "epoch": 0.8083339985631037, "grad_norm": 0.33525622809706074, "learning_rate": 1.865685924293772e-06, "loss": 0.1637, "step": 10126 }, { "epoch": 0.8084138261355472, "grad_norm": 0.3361294848114171, "learning_rate": 1.8641823352215894e-06, "loss": 0.1723, "step": 10127 }, { "epoch": 0.8084936537079908, "grad_norm": 0.2651391118033792, "learning_rate": 1.8626792899966174e-06, "loss": 0.1226, "step": 10128 }, { "epoch": 0.8085734812804343, "grad_norm": 0.257373703515457, "learning_rate": 1.8611767887193365e-06, "loss": 0.1102, "step": 10129 }, { "epoch": 0.8086533088528778, "grad_norm": 0.2775687623932892, "learning_rate": 1.859674831490177e-06, "loss": 0.1503, "step": 10130 }, { "epoch": 0.8087331364253213, "grad_norm": 0.266927895576651, "learning_rate": 1.8581734184095379e-06, "loss": 0.1537, "step": 10131 }, { "epoch": 0.8088129639977648, "grad_norm": 0.2830033142655006, "learning_rate": 1.856672549577787e-06, "loss": 0.1605, "step": 10132 }, { "epoch": 0.8088927915702083, "grad_norm": 0.2905598111748514, "learning_rate": 1.855172225095251e-06, "loss": 0.1391, "step": 10133 }, { "epoch": 0.8089726191426518, "grad_norm": 0.30672260718867345, "learning_rate": 1.853672445062218e-06, "loss": 0.1561, "step": 10134 }, { "epoch": 0.8090524467150954, "grad_norm": 0.32402185460619826, "learning_rate": 1.8521732095789412e-06, "loss": 0.1629, "step": 10135 }, { "epoch": 0.8091322742875389, "grad_norm": 0.2976391146437789, "learning_rate": 1.8506745187456431e-06, "loss": 0.1787, "step": 10136 }, { "epoch": 0.8092121018599825, "grad_norm": 0.28022270291893675, "learning_rate": 1.8491763726625033e-06, "loss": 0.1637, "step": 10137 }, { "epoch": 0.809291929432426, "grad_norm": 0.3562201722942162, "learning_rate": 1.8476787714296673e-06, "loss": 0.1522, "step": 10138 }, { "epoch": 0.8093717570048695, "grad_norm": 0.269303721412098, "learning_rate": 1.8461817151472428e-06, "loss": 0.1665, "step": 10139 }, { "epoch": 0.809451584577313, "grad_norm": 0.3074986572213395, "learning_rate": 1.8446852039153007e-06, "loss": 0.1416, "step": 10140 }, { "epoch": 0.8095314121497565, "grad_norm": 0.2703212778207184, "learning_rate": 1.84318923783388e-06, "loss": 0.1667, "step": 10141 }, { "epoch": 0.8096112397222001, "grad_norm": 0.26465880443337436, "learning_rate": 1.8416938170029796e-06, "loss": 0.1525, "step": 10142 }, { "epoch": 0.8096910672946436, "grad_norm": 0.2620720876717731, "learning_rate": 1.8401989415225618e-06, "loss": 0.1901, "step": 10143 }, { "epoch": 0.8097708948670871, "grad_norm": 0.29105807211681806, "learning_rate": 1.8387046114925522e-06, "loss": 0.1323, "step": 10144 }, { "epoch": 0.8098507224395306, "grad_norm": 0.2972955170687768, "learning_rate": 1.8372108270128397e-06, "loss": 0.1785, "step": 10145 }, { "epoch": 0.8099305500119741, "grad_norm": 0.33098065182141007, "learning_rate": 1.8357175881832822e-06, "loss": 0.2302, "step": 10146 }, { "epoch": 0.8100103775844176, "grad_norm": 0.33029187209822886, "learning_rate": 1.8342248951036946e-06, "loss": 0.1649, "step": 10147 }, { "epoch": 0.8100902051568611, "grad_norm": 0.2894969560343468, "learning_rate": 1.8327327478738578e-06, "loss": 0.1155, "step": 10148 }, { "epoch": 0.8101700327293047, "grad_norm": 0.30309108604556095, "learning_rate": 1.831241146593512e-06, "loss": 0.1758, "step": 10149 }, { "epoch": 0.8102498603017482, "grad_norm": 0.2907825362396078, "learning_rate": 1.8297500913623712e-06, "loss": 0.1539, "step": 10150 }, { "epoch": 0.8103296878741918, "grad_norm": 0.3124707448768223, "learning_rate": 1.828259582280103e-06, "loss": 0.1312, "step": 10151 }, { "epoch": 0.8104095154466353, "grad_norm": 0.2801160240749937, "learning_rate": 1.8267696194463436e-06, "loss": 0.1559, "step": 10152 }, { "epoch": 0.8104893430190788, "grad_norm": 0.28670886216742275, "learning_rate": 1.8252802029606898e-06, "loss": 0.1711, "step": 10153 }, { "epoch": 0.8105691705915223, "grad_norm": 0.2358917012001919, "learning_rate": 1.823791332922702e-06, "loss": 0.1591, "step": 10154 }, { "epoch": 0.8106489981639659, "grad_norm": 0.3309656790321743, "learning_rate": 1.8223030094319084e-06, "loss": 0.214, "step": 10155 }, { "epoch": 0.8107288257364094, "grad_norm": 0.3324917548696888, "learning_rate": 1.820815232587796e-06, "loss": 0.1516, "step": 10156 }, { "epoch": 0.8108086533088529, "grad_norm": 0.2629922686408823, "learning_rate": 1.8193280024898174e-06, "loss": 0.1313, "step": 10157 }, { "epoch": 0.8108884808812964, "grad_norm": 0.29435310846737917, "learning_rate": 1.8178413192373868e-06, "loss": 0.1355, "step": 10158 }, { "epoch": 0.8109683084537399, "grad_norm": 0.30676303300255875, "learning_rate": 1.816355182929882e-06, "loss": 0.1224, "step": 10159 }, { "epoch": 0.8110481360261834, "grad_norm": 0.2793726844093054, "learning_rate": 1.8148695936666517e-06, "loss": 0.1797, "step": 10160 }, { "epoch": 0.8111279635986269, "grad_norm": 0.28604766909941315, "learning_rate": 1.8133845515469917e-06, "loss": 0.1678, "step": 10161 }, { "epoch": 0.8112077911710704, "grad_norm": 0.2620874783102417, "learning_rate": 1.8119000566701794e-06, "loss": 0.1209, "step": 10162 }, { "epoch": 0.811287618743514, "grad_norm": 0.2913252859896367, "learning_rate": 1.8104161091354422e-06, "loss": 0.1321, "step": 10163 }, { "epoch": 0.8113674463159576, "grad_norm": 0.2901247159281618, "learning_rate": 1.8089327090419806e-06, "loss": 0.1954, "step": 10164 }, { "epoch": 0.8114472738884011, "grad_norm": 0.26813004004425844, "learning_rate": 1.807449856488952e-06, "loss": 0.1688, "step": 10165 }, { "epoch": 0.8115271014608446, "grad_norm": 0.26914569712848935, "learning_rate": 1.8059675515754793e-06, "loss": 0.1965, "step": 10166 }, { "epoch": 0.8116069290332881, "grad_norm": 0.2898438674191991, "learning_rate": 1.8044857944006489e-06, "loss": 0.1308, "step": 10167 }, { "epoch": 0.8116867566057316, "grad_norm": 0.2792224927894691, "learning_rate": 1.8030045850635069e-06, "loss": 0.165, "step": 10168 }, { "epoch": 0.8117665841781752, "grad_norm": 0.2740728171064863, "learning_rate": 1.8015239236630744e-06, "loss": 0.1575, "step": 10169 }, { "epoch": 0.8118464117506187, "grad_norm": 0.2864886397184259, "learning_rate": 1.8000438102983176e-06, "loss": 0.1391, "step": 10170 }, { "epoch": 0.8119262393230622, "grad_norm": 0.2805408242289046, "learning_rate": 1.7985642450681851e-06, "loss": 0.1563, "step": 10171 }, { "epoch": 0.8120060668955057, "grad_norm": 0.2939905783413235, "learning_rate": 1.7970852280715723e-06, "loss": 0.1718, "step": 10172 }, { "epoch": 0.8120858944679492, "grad_norm": 0.32245340152750107, "learning_rate": 1.7956067594073556e-06, "loss": 0.1643, "step": 10173 }, { "epoch": 0.8121657220403927, "grad_norm": 0.2663627029305846, "learning_rate": 1.794128839174356e-06, "loss": 0.1156, "step": 10174 }, { "epoch": 0.8122455496128362, "grad_norm": 0.24632434926530453, "learning_rate": 1.7926514674713669e-06, "loss": 0.2048, "step": 10175 }, { "epoch": 0.8123253771852798, "grad_norm": 0.2918735507423593, "learning_rate": 1.791174644397149e-06, "loss": 0.1205, "step": 10176 }, { "epoch": 0.8124052047577233, "grad_norm": 0.3164852474794669, "learning_rate": 1.7896983700504211e-06, "loss": 0.1624, "step": 10177 }, { "epoch": 0.8124850323301669, "grad_norm": 0.2749100093246223, "learning_rate": 1.7882226445298656e-06, "loss": 0.1403, "step": 10178 }, { "epoch": 0.8125648599026104, "grad_norm": 0.26634701697998947, "learning_rate": 1.7867474679341256e-06, "loss": 0.1934, "step": 10179 }, { "epoch": 0.8126446874750539, "grad_norm": 0.30027938138359345, "learning_rate": 1.785272840361818e-06, "loss": 0.1589, "step": 10180 }, { "epoch": 0.8127245150474974, "grad_norm": 0.3126801746500934, "learning_rate": 1.7837987619115104e-06, "loss": 0.1366, "step": 10181 }, { "epoch": 0.812804342619941, "grad_norm": 0.2970061273660153, "learning_rate": 1.782325232681741e-06, "loss": 0.1611, "step": 10182 }, { "epoch": 0.8128841701923845, "grad_norm": 0.2714557434278598, "learning_rate": 1.7808522527710092e-06, "loss": 0.1707, "step": 10183 }, { "epoch": 0.812963997764828, "grad_norm": 0.24522199842579065, "learning_rate": 1.779379822277776e-06, "loss": 0.1657, "step": 10184 }, { "epoch": 0.8130438253372715, "grad_norm": 0.3111020097845066, "learning_rate": 1.7779079413004708e-06, "loss": 0.1576, "step": 10185 }, { "epoch": 0.813123652909715, "grad_norm": 0.31164522829755165, "learning_rate": 1.7764366099374808e-06, "loss": 0.1692, "step": 10186 }, { "epoch": 0.8132034804821585, "grad_norm": 0.28839840431877184, "learning_rate": 1.7749658282871608e-06, "loss": 0.151, "step": 10187 }, { "epoch": 0.813283308054602, "grad_norm": 0.26247608053937854, "learning_rate": 1.773495596447824e-06, "loss": 0.1419, "step": 10188 }, { "epoch": 0.8133631356270455, "grad_norm": 0.3310785891351608, "learning_rate": 1.7720259145177488e-06, "loss": 0.204, "step": 10189 }, { "epoch": 0.813442963199489, "grad_norm": 0.30382415532839574, "learning_rate": 1.7705567825951819e-06, "loss": 0.1521, "step": 10190 }, { "epoch": 0.8135227907719327, "grad_norm": 0.3450708579572944, "learning_rate": 1.7690882007783272e-06, "loss": 0.1921, "step": 10191 }, { "epoch": 0.8136026183443762, "grad_norm": 0.29904854829270033, "learning_rate": 1.7676201691653527e-06, "loss": 0.1648, "step": 10192 }, { "epoch": 0.8136824459168197, "grad_norm": 0.27168822693490696, "learning_rate": 1.766152687854389e-06, "loss": 0.151, "step": 10193 }, { "epoch": 0.8137622734892632, "grad_norm": 0.272958792438851, "learning_rate": 1.764685756943535e-06, "loss": 0.1944, "step": 10194 }, { "epoch": 0.8138421010617067, "grad_norm": 0.2643814402624619, "learning_rate": 1.7632193765308469e-06, "loss": 0.1716, "step": 10195 }, { "epoch": 0.8139219286341502, "grad_norm": 0.27850005949534684, "learning_rate": 1.7617535467143487e-06, "loss": 0.1332, "step": 10196 }, { "epoch": 0.8140017562065938, "grad_norm": 0.3033483001865623, "learning_rate": 1.7602882675920229e-06, "loss": 0.1398, "step": 10197 }, { "epoch": 0.8140815837790373, "grad_norm": 0.2897397966443543, "learning_rate": 1.758823539261816e-06, "loss": 0.165, "step": 10198 }, { "epoch": 0.8141614113514808, "grad_norm": 0.2661597311540926, "learning_rate": 1.7573593618216444e-06, "loss": 0.1439, "step": 10199 }, { "epoch": 0.8142412389239243, "grad_norm": 0.29158392690905477, "learning_rate": 1.7558957353693795e-06, "loss": 0.1607, "step": 10200 }, { "epoch": 0.8143210664963678, "grad_norm": 0.34303495253935795, "learning_rate": 1.7544326600028594e-06, "loss": 0.1335, "step": 10201 }, { "epoch": 0.8144008940688113, "grad_norm": 0.29301498842580737, "learning_rate": 1.7529701358198825e-06, "loss": 0.132, "step": 10202 }, { "epoch": 0.8144807216412548, "grad_norm": 0.2931141325042629, "learning_rate": 1.7515081629182174e-06, "loss": 0.1873, "step": 10203 }, { "epoch": 0.8145605492136984, "grad_norm": 0.2853511772181161, "learning_rate": 1.7500467413955913e-06, "loss": 0.1871, "step": 10204 }, { "epoch": 0.814640376786142, "grad_norm": 0.2764499370823019, "learning_rate": 1.748585871349686e-06, "loss": 0.1371, "step": 10205 }, { "epoch": 0.8147202043585855, "grad_norm": 0.29032012228996706, "learning_rate": 1.747125552878165e-06, "loss": 0.1699, "step": 10206 }, { "epoch": 0.814800031931029, "grad_norm": 0.3122934225182903, "learning_rate": 1.7456657860786375e-06, "loss": 0.1447, "step": 10207 }, { "epoch": 0.8148798595034725, "grad_norm": 0.24785590266797589, "learning_rate": 1.744206571048691e-06, "loss": 0.1844, "step": 10208 }, { "epoch": 0.814959687075916, "grad_norm": 0.263901370040401, "learning_rate": 1.742747907885859e-06, "loss": 0.2161, "step": 10209 }, { "epoch": 0.8150395146483596, "grad_norm": 0.32844699099599645, "learning_rate": 1.7412897966876542e-06, "loss": 0.1887, "step": 10210 }, { "epoch": 0.8151193422208031, "grad_norm": 0.29566919985377305, "learning_rate": 1.7398322375515432e-06, "loss": 0.1765, "step": 10211 }, { "epoch": 0.8151991697932466, "grad_norm": 0.309512834501927, "learning_rate": 1.7383752305749557e-06, "loss": 0.1131, "step": 10212 }, { "epoch": 0.8152789973656901, "grad_norm": 0.28879229294799896, "learning_rate": 1.7369187758552952e-06, "loss": 0.1634, "step": 10213 }, { "epoch": 0.8153588249381336, "grad_norm": 0.2655764096077423, "learning_rate": 1.7354628734899082e-06, "loss": 0.1382, "step": 10214 }, { "epoch": 0.8154386525105771, "grad_norm": 0.28242304653481703, "learning_rate": 1.7340075235761244e-06, "loss": 0.1451, "step": 10215 }, { "epoch": 0.8155184800830206, "grad_norm": 0.2899325636668609, "learning_rate": 1.7325527262112229e-06, "loss": 0.1472, "step": 10216 }, { "epoch": 0.8155983076554641, "grad_norm": 0.2929470583348262, "learning_rate": 1.7310984814924592e-06, "loss": 0.1487, "step": 10217 }, { "epoch": 0.8156781352279078, "grad_norm": 0.33216288760025764, "learning_rate": 1.7296447895170353e-06, "loss": 0.1878, "step": 10218 }, { "epoch": 0.8157579628003513, "grad_norm": 0.3488610686640488, "learning_rate": 1.728191650382126e-06, "loss": 0.1245, "step": 10219 }, { "epoch": 0.8158377903727948, "grad_norm": 0.2768692628725692, "learning_rate": 1.7267390641848725e-06, "loss": 0.1451, "step": 10220 }, { "epoch": 0.8159176179452383, "grad_norm": 0.306184647208325, "learning_rate": 1.7252870310223724e-06, "loss": 0.1373, "step": 10221 }, { "epoch": 0.8159974455176818, "grad_norm": 0.2932504594413976, "learning_rate": 1.723835550991686e-06, "loss": 0.0939, "step": 10222 }, { "epoch": 0.8160772730901253, "grad_norm": 0.2797901922980283, "learning_rate": 1.7223846241898401e-06, "loss": 0.1617, "step": 10223 }, { "epoch": 0.8161571006625689, "grad_norm": 0.3430115701324072, "learning_rate": 1.720934250713825e-06, "loss": 0.1667, "step": 10224 }, { "epoch": 0.8162369282350124, "grad_norm": 0.2900229224889009, "learning_rate": 1.7194844306605928e-06, "loss": 0.1568, "step": 10225 }, { "epoch": 0.8163167558074559, "grad_norm": 0.23662088094692518, "learning_rate": 1.7180351641270553e-06, "loss": 0.1612, "step": 10226 }, { "epoch": 0.8163965833798994, "grad_norm": 0.266731999202348, "learning_rate": 1.716586451210094e-06, "loss": 0.1405, "step": 10227 }, { "epoch": 0.8164764109523429, "grad_norm": 0.24153724780281807, "learning_rate": 1.715138292006543e-06, "loss": 0.1408, "step": 10228 }, { "epoch": 0.8165562385247864, "grad_norm": 0.2801698419021368, "learning_rate": 1.713690686613214e-06, "loss": 0.1563, "step": 10229 }, { "epoch": 0.8166360660972299, "grad_norm": 0.28154253926407635, "learning_rate": 1.7122436351268702e-06, "loss": 0.1504, "step": 10230 }, { "epoch": 0.8167158936696735, "grad_norm": 0.29961814061710673, "learning_rate": 1.710797137644241e-06, "loss": 0.1627, "step": 10231 }, { "epoch": 0.8167957212421171, "grad_norm": 0.2911672679220243, "learning_rate": 1.7093511942620166e-06, "loss": 0.1795, "step": 10232 }, { "epoch": 0.8168755488145606, "grad_norm": 0.3026789537209397, "learning_rate": 1.7079058050768583e-06, "loss": 0.2001, "step": 10233 }, { "epoch": 0.8169553763870041, "grad_norm": 0.3022462497027266, "learning_rate": 1.7064609701853808e-06, "loss": 0.1342, "step": 10234 }, { "epoch": 0.8170352039594476, "grad_norm": 0.29331790176989864, "learning_rate": 1.705016689684167e-06, "loss": 0.1571, "step": 10235 }, { "epoch": 0.8171150315318911, "grad_norm": 0.2819449912022558, "learning_rate": 1.7035729636697608e-06, "loss": 0.1509, "step": 10236 }, { "epoch": 0.8171948591043346, "grad_norm": 0.27016754275047794, "learning_rate": 1.702129792238666e-06, "loss": 0.124, "step": 10237 }, { "epoch": 0.8172746866767782, "grad_norm": 0.30710293538175243, "learning_rate": 1.70068717548736e-06, "loss": 0.2235, "step": 10238 }, { "epoch": 0.8173545142492217, "grad_norm": 0.2867433384842238, "learning_rate": 1.6992451135122722e-06, "loss": 0.1613, "step": 10239 }, { "epoch": 0.8174343418216652, "grad_norm": 0.2480659967118185, "learning_rate": 1.6978036064097992e-06, "loss": 0.1346, "step": 10240 }, { "epoch": 0.8175141693941087, "grad_norm": 0.357932351111051, "learning_rate": 1.696362654276299e-06, "loss": 0.1392, "step": 10241 }, { "epoch": 0.8175939969665522, "grad_norm": 0.2885596407925222, "learning_rate": 1.6949222572080925e-06, "loss": 0.1895, "step": 10242 }, { "epoch": 0.8176738245389957, "grad_norm": 0.2567202667680724, "learning_rate": 1.6934824153014684e-06, "loss": 0.1494, "step": 10243 }, { "epoch": 0.8177536521114392, "grad_norm": 0.29160055642952865, "learning_rate": 1.6920431286526728e-06, "loss": 0.1567, "step": 10244 }, { "epoch": 0.8178334796838828, "grad_norm": 0.2889934287455499, "learning_rate": 1.6906043973579167e-06, "loss": 0.1401, "step": 10245 }, { "epoch": 0.8179133072563264, "grad_norm": 0.2531927954201227, "learning_rate": 1.6891662215133698e-06, "loss": 0.1944, "step": 10246 }, { "epoch": 0.8179931348287699, "grad_norm": 0.2955918984190053, "learning_rate": 1.6877286012151729e-06, "loss": 0.1141, "step": 10247 }, { "epoch": 0.8180729624012134, "grad_norm": 0.31195475381042764, "learning_rate": 1.6862915365594278e-06, "loss": 0.1765, "step": 10248 }, { "epoch": 0.8181527899736569, "grad_norm": 0.2931284148014233, "learning_rate": 1.6848550276421871e-06, "loss": 0.1653, "step": 10249 }, { "epoch": 0.8182326175461004, "grad_norm": 0.34041493632012954, "learning_rate": 1.6834190745594837e-06, "loss": 0.189, "step": 10250 }, { "epoch": 0.818312445118544, "grad_norm": 0.2989862940181303, "learning_rate": 1.6819836774073006e-06, "loss": 0.1491, "step": 10251 }, { "epoch": 0.8183922726909875, "grad_norm": 0.29698135542786763, "learning_rate": 1.6805488362815958e-06, "loss": 0.1628, "step": 10252 }, { "epoch": 0.818472100263431, "grad_norm": 0.35332201238015526, "learning_rate": 1.6791145512782724e-06, "loss": 0.1469, "step": 10253 }, { "epoch": 0.8185519278358745, "grad_norm": 0.30456008984609284, "learning_rate": 1.6776808224932152e-06, "loss": 0.142, "step": 10254 }, { "epoch": 0.818631755408318, "grad_norm": 0.2755563252207763, "learning_rate": 1.6762476500222592e-06, "loss": 0.1565, "step": 10255 }, { "epoch": 0.8187115829807615, "grad_norm": 0.29615761961568304, "learning_rate": 1.6748150339612046e-06, "loss": 0.1151, "step": 10256 }, { "epoch": 0.818791410553205, "grad_norm": 0.3039859465248065, "learning_rate": 1.6733829744058239e-06, "loss": 0.1301, "step": 10257 }, { "epoch": 0.8188712381256485, "grad_norm": 0.2574264707664171, "learning_rate": 1.6719514714518348e-06, "loss": 0.181, "step": 10258 }, { "epoch": 0.8189510656980922, "grad_norm": 0.2838836451126828, "learning_rate": 1.6705205251949353e-06, "loss": 0.1383, "step": 10259 }, { "epoch": 0.8190308932705357, "grad_norm": 0.29460088965030723, "learning_rate": 1.669090135730772e-06, "loss": 0.1771, "step": 10260 }, { "epoch": 0.8191107208429792, "grad_norm": 0.248216681960307, "learning_rate": 1.6676603031549698e-06, "loss": 0.1913, "step": 10261 }, { "epoch": 0.8191905484154227, "grad_norm": 0.31892386229552766, "learning_rate": 1.6662310275630955e-06, "loss": 0.158, "step": 10262 }, { "epoch": 0.8192703759878662, "grad_norm": 0.35041701774029843, "learning_rate": 1.6648023090507004e-06, "loss": 0.1901, "step": 10263 }, { "epoch": 0.8193502035603097, "grad_norm": 0.2708822149627461, "learning_rate": 1.6633741477132847e-06, "loss": 0.1639, "step": 10264 }, { "epoch": 0.8194300311327533, "grad_norm": 0.2679049622791409, "learning_rate": 1.6619465436463157e-06, "loss": 0.162, "step": 10265 }, { "epoch": 0.8195098587051968, "grad_norm": 0.27426194721399233, "learning_rate": 1.6605194969452231e-06, "loss": 0.1509, "step": 10266 }, { "epoch": 0.8195896862776403, "grad_norm": 0.29548174755644724, "learning_rate": 1.6590930077053968e-06, "loss": 0.1613, "step": 10267 }, { "epoch": 0.8196695138500838, "grad_norm": 0.36385996128812276, "learning_rate": 1.657667076022197e-06, "loss": 0.1701, "step": 10268 }, { "epoch": 0.8197493414225273, "grad_norm": 0.2945119493581074, "learning_rate": 1.6562417019909393e-06, "loss": 0.129, "step": 10269 }, { "epoch": 0.8198291689949708, "grad_norm": 0.24536265244848235, "learning_rate": 1.6548168857069036e-06, "loss": 0.1717, "step": 10270 }, { "epoch": 0.8199089965674143, "grad_norm": 0.25719960716201673, "learning_rate": 1.6533926272653345e-06, "loss": 0.1849, "step": 10271 }, { "epoch": 0.8199888241398579, "grad_norm": 0.33693869503322593, "learning_rate": 1.6519689267614337e-06, "loss": 0.1497, "step": 10272 }, { "epoch": 0.8200686517123015, "grad_norm": 0.28638006384497167, "learning_rate": 1.650545784290376e-06, "loss": 0.1709, "step": 10273 }, { "epoch": 0.820148479284745, "grad_norm": 0.27164274553387274, "learning_rate": 1.6491231999472912e-06, "loss": 0.1379, "step": 10274 }, { "epoch": 0.8202283068571885, "grad_norm": 0.2626625270734241, "learning_rate": 1.647701173827272e-06, "loss": 0.151, "step": 10275 }, { "epoch": 0.820308134429632, "grad_norm": 0.28016032546689457, "learning_rate": 1.6462797060253721e-06, "loss": 0.1756, "step": 10276 }, { "epoch": 0.8203879620020755, "grad_norm": 0.27478629524451836, "learning_rate": 1.6448587966366191e-06, "loss": 0.1579, "step": 10277 }, { "epoch": 0.820467789574519, "grad_norm": 0.3221558522888357, "learning_rate": 1.6434384457559892e-06, "loss": 0.1643, "step": 10278 }, { "epoch": 0.8205476171469626, "grad_norm": 0.23338490172238113, "learning_rate": 1.64201865347843e-06, "loss": 0.1693, "step": 10279 }, { "epoch": 0.8206274447194061, "grad_norm": 0.27988204183014576, "learning_rate": 1.6405994198988474e-06, "loss": 0.1885, "step": 10280 }, { "epoch": 0.8207072722918496, "grad_norm": 0.3054249578612152, "learning_rate": 1.6391807451121089e-06, "loss": 0.1555, "step": 10281 }, { "epoch": 0.8207870998642931, "grad_norm": 0.2773916900912008, "learning_rate": 1.6377626292130532e-06, "loss": 0.1654, "step": 10282 }, { "epoch": 0.8208669274367366, "grad_norm": 0.38323888602250494, "learning_rate": 1.6363450722964725e-06, "loss": 0.2009, "step": 10283 }, { "epoch": 0.8209467550091801, "grad_norm": 0.27453839844989747, "learning_rate": 1.634928074457125e-06, "loss": 0.1265, "step": 10284 }, { "epoch": 0.8210265825816236, "grad_norm": 0.3311430108677445, "learning_rate": 1.6335116357897319e-06, "loss": 0.1725, "step": 10285 }, { "epoch": 0.8211064101540673, "grad_norm": 0.30676338264920644, "learning_rate": 1.632095756388974e-06, "loss": 0.1781, "step": 10286 }, { "epoch": 0.8211862377265108, "grad_norm": 0.26066617521142366, "learning_rate": 1.6306804363495022e-06, "loss": 0.1782, "step": 10287 }, { "epoch": 0.8212660652989543, "grad_norm": 0.29291335716895933, "learning_rate": 1.629265675765922e-06, "loss": 0.1497, "step": 10288 }, { "epoch": 0.8213458928713978, "grad_norm": 0.27928887857604384, "learning_rate": 1.6278514747328045e-06, "loss": 0.1259, "step": 10289 }, { "epoch": 0.8214257204438413, "grad_norm": 0.23183793945723358, "learning_rate": 1.6264378333446818e-06, "loss": 0.1834, "step": 10290 }, { "epoch": 0.8215055480162848, "grad_norm": 0.29456572051780405, "learning_rate": 1.6250247516960538e-06, "loss": 0.161, "step": 10291 }, { "epoch": 0.8215853755887284, "grad_norm": 0.30752617852885417, "learning_rate": 1.6236122298813816e-06, "loss": 0.1626, "step": 10292 }, { "epoch": 0.8216652031611719, "grad_norm": 0.3012880026226417, "learning_rate": 1.6222002679950766e-06, "loss": 0.1561, "step": 10293 }, { "epoch": 0.8217450307336154, "grad_norm": 0.2720836367608581, "learning_rate": 1.620788866131533e-06, "loss": 0.1751, "step": 10294 }, { "epoch": 0.8218248583060589, "grad_norm": 0.2830079933306192, "learning_rate": 1.6193780243850898e-06, "loss": 0.141, "step": 10295 }, { "epoch": 0.8219046858785024, "grad_norm": 0.2803413151034813, "learning_rate": 1.6179677428500663e-06, "loss": 0.1913, "step": 10296 }, { "epoch": 0.8219845134509459, "grad_norm": 0.2717344866924826, "learning_rate": 1.6165580216207222e-06, "loss": 0.1897, "step": 10297 }, { "epoch": 0.8220643410233894, "grad_norm": 0.3432763858759969, "learning_rate": 1.6151488607913e-06, "loss": 0.1358, "step": 10298 }, { "epoch": 0.822144168595833, "grad_norm": 0.2779453215271378, "learning_rate": 1.6137402604559926e-06, "loss": 0.1857, "step": 10299 }, { "epoch": 0.8222239961682766, "grad_norm": 0.29402598044948675, "learning_rate": 1.6123322207089642e-06, "loss": 0.1569, "step": 10300 }, { "epoch": 0.8223038237407201, "grad_norm": 0.28409833295654563, "learning_rate": 1.6109247416443318e-06, "loss": 0.1545, "step": 10301 }, { "epoch": 0.8223836513131636, "grad_norm": 0.2692002632576205, "learning_rate": 1.6095178233561782e-06, "loss": 0.1358, "step": 10302 }, { "epoch": 0.8224634788856071, "grad_norm": 0.2908814282755603, "learning_rate": 1.6081114659385566e-06, "loss": 0.1623, "step": 10303 }, { "epoch": 0.8225433064580506, "grad_norm": 0.2654244299478513, "learning_rate": 1.606705669485471e-06, "loss": 0.11, "step": 10304 }, { "epoch": 0.8226231340304941, "grad_norm": 0.2817865494025532, "learning_rate": 1.6053004340908995e-06, "loss": 0.1643, "step": 10305 }, { "epoch": 0.8227029616029377, "grad_norm": 0.27263716264960847, "learning_rate": 1.603895759848768e-06, "loss": 0.1779, "step": 10306 }, { "epoch": 0.8227827891753812, "grad_norm": 0.27760039221952865, "learning_rate": 1.6024916468529815e-06, "loss": 0.1776, "step": 10307 }, { "epoch": 0.8228626167478247, "grad_norm": 0.30154552462827666, "learning_rate": 1.601088095197395e-06, "loss": 0.1224, "step": 10308 }, { "epoch": 0.8229424443202682, "grad_norm": 0.2808008974932907, "learning_rate": 1.5996851049758312e-06, "loss": 0.1718, "step": 10309 }, { "epoch": 0.8230222718927117, "grad_norm": 0.2653732812599879, "learning_rate": 1.5982826762820747e-06, "loss": 0.1821, "step": 10310 }, { "epoch": 0.8231020994651552, "grad_norm": 0.2900364280020105, "learning_rate": 1.5968808092098698e-06, "loss": 0.2056, "step": 10311 }, { "epoch": 0.8231819270375987, "grad_norm": 0.30537978118316, "learning_rate": 1.5954795038529303e-06, "loss": 0.1846, "step": 10312 }, { "epoch": 0.8232617546100424, "grad_norm": 0.3140196219076535, "learning_rate": 1.5940787603049257e-06, "loss": 0.1828, "step": 10313 }, { "epoch": 0.8233415821824859, "grad_norm": 0.29275492466939523, "learning_rate": 1.5926785786594912e-06, "loss": 0.2136, "step": 10314 }, { "epoch": 0.8234214097549294, "grad_norm": 0.30025166938184084, "learning_rate": 1.5912789590102206e-06, "loss": 0.1922, "step": 10315 }, { "epoch": 0.8235012373273729, "grad_norm": 0.3086932303964944, "learning_rate": 1.5898799014506739e-06, "loss": 0.1459, "step": 10316 }, { "epoch": 0.8235810648998164, "grad_norm": 0.2809385725452494, "learning_rate": 1.5884814060743748e-06, "loss": 0.1305, "step": 10317 }, { "epoch": 0.8236608924722599, "grad_norm": 0.2790661840712293, "learning_rate": 1.5870834729748064e-06, "loss": 0.1552, "step": 10318 }, { "epoch": 0.8237407200447034, "grad_norm": 0.29233158572436185, "learning_rate": 1.585686102245414e-06, "loss": 0.1638, "step": 10319 }, { "epoch": 0.823820547617147, "grad_norm": 0.3031417219643091, "learning_rate": 1.584289293979605e-06, "loss": 0.1762, "step": 10320 }, { "epoch": 0.8239003751895905, "grad_norm": 0.2946294844744036, "learning_rate": 1.5828930482707538e-06, "loss": 0.1106, "step": 10321 }, { "epoch": 0.823980202762034, "grad_norm": 0.2819022249709079, "learning_rate": 1.5814973652121922e-06, "loss": 0.1406, "step": 10322 }, { "epoch": 0.8240600303344775, "grad_norm": 0.25409258952529484, "learning_rate": 1.580102244897217e-06, "loss": 0.214, "step": 10323 }, { "epoch": 0.824139857906921, "grad_norm": 0.28389589278948835, "learning_rate": 1.5787076874190855e-06, "loss": 0.1736, "step": 10324 }, { "epoch": 0.8242196854793645, "grad_norm": 0.27759828069032666, "learning_rate": 1.5773136928710152e-06, "loss": 0.1475, "step": 10325 }, { "epoch": 0.824299513051808, "grad_norm": 0.27755810649623874, "learning_rate": 1.5759202613461954e-06, "loss": 0.1392, "step": 10326 }, { "epoch": 0.8243793406242517, "grad_norm": 0.2531846246969553, "learning_rate": 1.574527392937768e-06, "loss": 0.1615, "step": 10327 }, { "epoch": 0.8244591681966952, "grad_norm": 0.32773521504823344, "learning_rate": 1.5731350877388418e-06, "loss": 0.1766, "step": 10328 }, { "epoch": 0.8245389957691387, "grad_norm": 0.3039753243324986, "learning_rate": 1.5717433458424835e-06, "loss": 0.1962, "step": 10329 }, { "epoch": 0.8246188233415822, "grad_norm": 0.2647836773121734, "learning_rate": 1.570352167341731e-06, "loss": 0.1306, "step": 10330 }, { "epoch": 0.8246986509140257, "grad_norm": 0.33585725503869424, "learning_rate": 1.5689615523295765e-06, "loss": 0.152, "step": 10331 }, { "epoch": 0.8247784784864692, "grad_norm": 0.26338214603588816, "learning_rate": 1.5675715008989766e-06, "loss": 0.1455, "step": 10332 }, { "epoch": 0.8248583060589127, "grad_norm": 0.2309803665902176, "learning_rate": 1.5661820131428517e-06, "loss": 0.1465, "step": 10333 }, { "epoch": 0.8249381336313563, "grad_norm": 0.24617737267250317, "learning_rate": 1.5647930891540809e-06, "loss": 0.1354, "step": 10334 }, { "epoch": 0.8250179612037998, "grad_norm": 0.30744749336542637, "learning_rate": 1.5634047290255127e-06, "loss": 0.1518, "step": 10335 }, { "epoch": 0.8250977887762433, "grad_norm": 0.29108054970973185, "learning_rate": 1.5620169328499502e-06, "loss": 0.1925, "step": 10336 }, { "epoch": 0.8251776163486868, "grad_norm": 0.37204363907946003, "learning_rate": 1.5606297007201642e-06, "loss": 0.1658, "step": 10337 }, { "epoch": 0.8252574439211303, "grad_norm": 0.26913173472106583, "learning_rate": 1.5592430327288844e-06, "loss": 0.1671, "step": 10338 }, { "epoch": 0.8253372714935738, "grad_norm": 0.28437479514191705, "learning_rate": 1.5578569289688017e-06, "loss": 0.1793, "step": 10339 }, { "epoch": 0.8254170990660175, "grad_norm": 0.3060199102628697, "learning_rate": 1.5564713895325789e-06, "loss": 0.1627, "step": 10340 }, { "epoch": 0.825496926638461, "grad_norm": 0.298662783807429, "learning_rate": 1.5550864145128242e-06, "loss": 0.1365, "step": 10341 }, { "epoch": 0.8255767542109045, "grad_norm": 0.25289377177025646, "learning_rate": 1.5537020040021245e-06, "loss": 0.1281, "step": 10342 }, { "epoch": 0.825656581783348, "grad_norm": 0.30137659901066993, "learning_rate": 1.5523181580930192e-06, "loss": 0.1817, "step": 10343 }, { "epoch": 0.8257364093557915, "grad_norm": 0.24931380189044755, "learning_rate": 1.5509348768780176e-06, "loss": 0.1599, "step": 10344 }, { "epoch": 0.825816236928235, "grad_norm": 0.29433837020855863, "learning_rate": 1.5495521604495812e-06, "loss": 0.1459, "step": 10345 }, { "epoch": 0.8258960645006785, "grad_norm": 0.30111167716841425, "learning_rate": 1.5481700089001384e-06, "loss": 0.1984, "step": 10346 }, { "epoch": 0.825975892073122, "grad_norm": 0.24896437154915677, "learning_rate": 1.5467884223220842e-06, "loss": 0.1323, "step": 10347 }, { "epoch": 0.8260557196455656, "grad_norm": 0.28078745546077927, "learning_rate": 1.5454074008077702e-06, "loss": 0.1513, "step": 10348 }, { "epoch": 0.8261355472180091, "grad_norm": 0.3419180984749593, "learning_rate": 1.544026944449517e-06, "loss": 0.1727, "step": 10349 }, { "epoch": 0.8262153747904526, "grad_norm": 0.3045707771597523, "learning_rate": 1.5426470533395943e-06, "loss": 0.1979, "step": 10350 }, { "epoch": 0.8262952023628961, "grad_norm": 0.2787558344080366, "learning_rate": 1.5412677275702492e-06, "loss": 0.1144, "step": 10351 }, { "epoch": 0.8263750299353396, "grad_norm": 0.2762961232775627, "learning_rate": 1.5398889672336826e-06, "loss": 0.136, "step": 10352 }, { "epoch": 0.8264548575077831, "grad_norm": 0.26549450226555876, "learning_rate": 1.5385107724220571e-06, "loss": 0.1593, "step": 10353 }, { "epoch": 0.8265346850802268, "grad_norm": 0.2565845678641058, "learning_rate": 1.5371331432275005e-06, "loss": 0.1541, "step": 10354 }, { "epoch": 0.8266145126526703, "grad_norm": 0.2950518430484914, "learning_rate": 1.5357560797421012e-06, "loss": 0.1524, "step": 10355 }, { "epoch": 0.8266943402251138, "grad_norm": 0.28287305948208225, "learning_rate": 1.5343795820579122e-06, "loss": 0.1675, "step": 10356 }, { "epoch": 0.8267741677975573, "grad_norm": 0.310018924528122, "learning_rate": 1.5330036502669465e-06, "loss": 0.1634, "step": 10357 }, { "epoch": 0.8268539953700008, "grad_norm": 0.24827849069587588, "learning_rate": 1.5316282844611796e-06, "loss": 0.1253, "step": 10358 }, { "epoch": 0.8269338229424443, "grad_norm": 0.2686169140483175, "learning_rate": 1.5302534847325467e-06, "loss": 0.1302, "step": 10359 }, { "epoch": 0.8270136505148878, "grad_norm": 0.2629852909444996, "learning_rate": 1.5288792511729522e-06, "loss": 0.1153, "step": 10360 }, { "epoch": 0.8270934780873314, "grad_norm": 0.23449782585694137, "learning_rate": 1.527505583874257e-06, "loss": 0.1354, "step": 10361 }, { "epoch": 0.8271733056597749, "grad_norm": 0.27438445650302745, "learning_rate": 1.526132482928283e-06, "loss": 0.1519, "step": 10362 }, { "epoch": 0.8272531332322184, "grad_norm": 0.27580070445800875, "learning_rate": 1.5247599484268184e-06, "loss": 0.1354, "step": 10363 }, { "epoch": 0.8273329608046619, "grad_norm": 0.2824441121822821, "learning_rate": 1.523387980461608e-06, "loss": 0.1681, "step": 10364 }, { "epoch": 0.8274127883771054, "grad_norm": 0.294438061476912, "learning_rate": 1.522016579124368e-06, "loss": 0.1296, "step": 10365 }, { "epoch": 0.8274926159495489, "grad_norm": 0.29195229203099465, "learning_rate": 1.520645744506769e-06, "loss": 0.1312, "step": 10366 }, { "epoch": 0.8275724435219926, "grad_norm": 0.2572729775721642, "learning_rate": 1.5192754767004458e-06, "loss": 0.2095, "step": 10367 }, { "epoch": 0.8276522710944361, "grad_norm": 0.3008713634752503, "learning_rate": 1.5179057757969962e-06, "loss": 0.1804, "step": 10368 }, { "epoch": 0.8277320986668796, "grad_norm": 0.30317923096125277, "learning_rate": 1.5165366418879745e-06, "loss": 0.1231, "step": 10369 }, { "epoch": 0.8278119262393231, "grad_norm": 0.27988559125300455, "learning_rate": 1.5151680750649078e-06, "loss": 0.1784, "step": 10370 }, { "epoch": 0.8278917538117666, "grad_norm": 0.28495851539047945, "learning_rate": 1.5138000754192783e-06, "loss": 0.1346, "step": 10371 }, { "epoch": 0.8279715813842101, "grad_norm": 0.294481059473692, "learning_rate": 1.5124326430425296e-06, "loss": 0.1436, "step": 10372 }, { "epoch": 0.8280514089566536, "grad_norm": 0.27412195919712395, "learning_rate": 1.5110657780260674e-06, "loss": 0.2122, "step": 10373 }, { "epoch": 0.8281312365290971, "grad_norm": 0.29888497055402186, "learning_rate": 1.5096994804612663e-06, "loss": 0.1902, "step": 10374 }, { "epoch": 0.8282110641015407, "grad_norm": 0.287922128680323, "learning_rate": 1.508333750439458e-06, "loss": 0.1631, "step": 10375 }, { "epoch": 0.8282908916739842, "grad_norm": 0.3161088797445315, "learning_rate": 1.5069685880519281e-06, "loss": 0.1555, "step": 10376 }, { "epoch": 0.8283707192464277, "grad_norm": 0.28648443903615567, "learning_rate": 1.505603993389939e-06, "loss": 0.1439, "step": 10377 }, { "epoch": 0.8284505468188712, "grad_norm": 0.2951752479381353, "learning_rate": 1.504239966544706e-06, "loss": 0.1294, "step": 10378 }, { "epoch": 0.8285303743913147, "grad_norm": 0.3159423942700355, "learning_rate": 1.5028765076074125e-06, "loss": 0.1051, "step": 10379 }, { "epoch": 0.8286102019637582, "grad_norm": 0.2991691302782048, "learning_rate": 1.5015136166691969e-06, "loss": 0.1543, "step": 10380 }, { "epoch": 0.8286900295362019, "grad_norm": 0.2950522142413063, "learning_rate": 1.5001512938211638e-06, "loss": 0.1389, "step": 10381 }, { "epoch": 0.8287698571086454, "grad_norm": 0.289685748784656, "learning_rate": 1.4987895391543804e-06, "loss": 0.1351, "step": 10382 }, { "epoch": 0.8288496846810889, "grad_norm": 0.26726995990187125, "learning_rate": 1.4974283527598699e-06, "loss": 0.2283, "step": 10383 }, { "epoch": 0.8289295122535324, "grad_norm": 0.39423159463592083, "learning_rate": 1.4960677347286313e-06, "loss": 0.1597, "step": 10384 }, { "epoch": 0.8290093398259759, "grad_norm": 0.29692222246167693, "learning_rate": 1.4947076851516052e-06, "loss": 0.2001, "step": 10385 }, { "epoch": 0.8290891673984194, "grad_norm": 0.2902890193788599, "learning_rate": 1.4933482041197145e-06, "loss": 0.202, "step": 10386 }, { "epoch": 0.8291689949708629, "grad_norm": 0.30041124079214654, "learning_rate": 1.49198929172383e-06, "loss": 0.1601, "step": 10387 }, { "epoch": 0.8292488225433065, "grad_norm": 0.29534485125343807, "learning_rate": 1.4906309480547954e-06, "loss": 0.189, "step": 10388 }, { "epoch": 0.82932865011575, "grad_norm": 0.29752622447288424, "learning_rate": 1.4892731732034049e-06, "loss": 0.1538, "step": 10389 }, { "epoch": 0.8294084776881935, "grad_norm": 0.30612766740068886, "learning_rate": 1.4879159672604194e-06, "loss": 0.1518, "step": 10390 }, { "epoch": 0.829488305260637, "grad_norm": 0.30435858275999594, "learning_rate": 1.4865593303165681e-06, "loss": 0.175, "step": 10391 }, { "epoch": 0.8295681328330805, "grad_norm": 0.290420829020644, "learning_rate": 1.4852032624625346e-06, "loss": 0.1342, "step": 10392 }, { "epoch": 0.829647960405524, "grad_norm": 0.2905204318992446, "learning_rate": 1.483847763788966e-06, "loss": 0.1668, "step": 10393 }, { "epoch": 0.8297277879779675, "grad_norm": 0.27912060519806453, "learning_rate": 1.4824928343864698e-06, "loss": 0.1074, "step": 10394 }, { "epoch": 0.8298076155504112, "grad_norm": 0.3190356622371088, "learning_rate": 1.4811384743456237e-06, "loss": 0.1457, "step": 10395 }, { "epoch": 0.8298874431228547, "grad_norm": 0.31773182991296983, "learning_rate": 1.479784683756954e-06, "loss": 0.1323, "step": 10396 }, { "epoch": 0.8299672706952982, "grad_norm": 0.2589162901550025, "learning_rate": 1.4784314627109653e-06, "loss": 0.128, "step": 10397 }, { "epoch": 0.8300470982677417, "grad_norm": 0.2859782693917281, "learning_rate": 1.4770788112981083e-06, "loss": 0.2596, "step": 10398 }, { "epoch": 0.8301269258401852, "grad_norm": 0.3280384758086627, "learning_rate": 1.4757267296088007e-06, "loss": 0.1819, "step": 10399 }, { "epoch": 0.8302067534126287, "grad_norm": 0.3361773735617689, "learning_rate": 1.4743752177334304e-06, "loss": 0.1754, "step": 10400 }, { "epoch": 0.8302865809850722, "grad_norm": 0.25540904171327283, "learning_rate": 1.473024275762337e-06, "loss": 0.1546, "step": 10401 }, { "epoch": 0.8303664085575158, "grad_norm": 0.2597622018436281, "learning_rate": 1.4716739037858252e-06, "loss": 0.1024, "step": 10402 }, { "epoch": 0.8304462361299593, "grad_norm": 0.2824973676605054, "learning_rate": 1.4703241018941606e-06, "loss": 0.1924, "step": 10403 }, { "epoch": 0.8305260637024028, "grad_norm": 0.23720354256365325, "learning_rate": 1.4689748701775774e-06, "loss": 0.1293, "step": 10404 }, { "epoch": 0.8306058912748463, "grad_norm": 0.3021279773573356, "learning_rate": 1.4676262087262627e-06, "loss": 0.155, "step": 10405 }, { "epoch": 0.8306857188472898, "grad_norm": 0.29388048974392966, "learning_rate": 1.4662781176303697e-06, "loss": 0.137, "step": 10406 }, { "epoch": 0.8307655464197333, "grad_norm": 0.27773227201606154, "learning_rate": 1.464930596980013e-06, "loss": 0.1268, "step": 10407 }, { "epoch": 0.830845373992177, "grad_norm": 0.2840592162151052, "learning_rate": 1.4635836468652664e-06, "loss": 0.2085, "step": 10408 }, { "epoch": 0.8309252015646205, "grad_norm": 0.23582324088399215, "learning_rate": 1.4622372673761732e-06, "loss": 0.1356, "step": 10409 }, { "epoch": 0.831005029137064, "grad_norm": 0.2969879197180868, "learning_rate": 1.4608914586027313e-06, "loss": 0.187, "step": 10410 }, { "epoch": 0.8310848567095075, "grad_norm": 0.3110552239761721, "learning_rate": 1.4595462206349032e-06, "loss": 0.1954, "step": 10411 }, { "epoch": 0.831164684281951, "grad_norm": 0.24997503673765795, "learning_rate": 1.4582015535626114e-06, "loss": 0.1499, "step": 10412 }, { "epoch": 0.8312445118543945, "grad_norm": 0.3226525432572261, "learning_rate": 1.45685745747574e-06, "loss": 0.1831, "step": 10413 }, { "epoch": 0.831324339426838, "grad_norm": 0.29479030222459934, "learning_rate": 1.455513932464141e-06, "loss": 0.1891, "step": 10414 }, { "epoch": 0.8314041669992815, "grad_norm": 0.2981888271460772, "learning_rate": 1.4541709786176216e-06, "loss": 0.1394, "step": 10415 }, { "epoch": 0.8314839945717251, "grad_norm": 0.3083521854146873, "learning_rate": 1.4528285960259525e-06, "loss": 0.1546, "step": 10416 }, { "epoch": 0.8315638221441686, "grad_norm": 0.2709576627833531, "learning_rate": 1.451486784778865e-06, "loss": 0.137, "step": 10417 }, { "epoch": 0.8316436497166121, "grad_norm": 0.2909578190439776, "learning_rate": 1.4501455449660584e-06, "loss": 0.1407, "step": 10418 }, { "epoch": 0.8317234772890556, "grad_norm": 0.32133624793102705, "learning_rate": 1.448804876677189e-06, "loss": 0.1314, "step": 10419 }, { "epoch": 0.8318033048614991, "grad_norm": 0.26166227204100423, "learning_rate": 1.4474647800018682e-06, "loss": 0.1529, "step": 10420 }, { "epoch": 0.8318831324339426, "grad_norm": 0.2977745592949542, "learning_rate": 1.4461252550296822e-06, "loss": 0.1592, "step": 10421 }, { "epoch": 0.8319629600063863, "grad_norm": 0.2930152593185766, "learning_rate": 1.4447863018501707e-06, "loss": 0.1803, "step": 10422 }, { "epoch": 0.8320427875788298, "grad_norm": 0.27682624821486895, "learning_rate": 1.4434479205528406e-06, "loss": 0.1486, "step": 10423 }, { "epoch": 0.8321226151512733, "grad_norm": 0.2996295605106749, "learning_rate": 1.442110111227154e-06, "loss": 0.143, "step": 10424 }, { "epoch": 0.8322024427237168, "grad_norm": 0.24428228755471812, "learning_rate": 1.4407728739625405e-06, "loss": 0.1448, "step": 10425 }, { "epoch": 0.8322822702961603, "grad_norm": 0.29897901117680975, "learning_rate": 1.4394362088483848e-06, "loss": 0.1918, "step": 10426 }, { "epoch": 0.8323620978686038, "grad_norm": 0.2881827112268855, "learning_rate": 1.4381001159740427e-06, "loss": 0.1293, "step": 10427 }, { "epoch": 0.8324419254410473, "grad_norm": 0.24188912938340792, "learning_rate": 1.4367645954288277e-06, "loss": 0.125, "step": 10428 }, { "epoch": 0.8325217530134909, "grad_norm": 0.30611141451135765, "learning_rate": 1.4354296473020068e-06, "loss": 0.1567, "step": 10429 }, { "epoch": 0.8326015805859344, "grad_norm": 0.2625594375946321, "learning_rate": 1.4340952716828228e-06, "loss": 0.1462, "step": 10430 }, { "epoch": 0.8326814081583779, "grad_norm": 0.28867638151623937, "learning_rate": 1.4327614686604674e-06, "loss": 0.1721, "step": 10431 }, { "epoch": 0.8327612357308214, "grad_norm": 0.27222552242813225, "learning_rate": 1.4314282383241097e-06, "loss": 0.1737, "step": 10432 }, { "epoch": 0.8328410633032649, "grad_norm": 0.33844206779577446, "learning_rate": 1.4300955807628592e-06, "loss": 0.1661, "step": 10433 }, { "epoch": 0.8329208908757084, "grad_norm": 0.29286030490014253, "learning_rate": 1.428763496065807e-06, "loss": 0.1713, "step": 10434 }, { "epoch": 0.833000718448152, "grad_norm": 0.2537276136686119, "learning_rate": 1.4274319843219952e-06, "loss": 0.1644, "step": 10435 }, { "epoch": 0.8330805460205956, "grad_norm": 0.29037246073417305, "learning_rate": 1.4261010456204305e-06, "loss": 0.1653, "step": 10436 }, { "epoch": 0.8331603735930391, "grad_norm": 0.29070088191071186, "learning_rate": 1.4247706800500794e-06, "loss": 0.1455, "step": 10437 }, { "epoch": 0.8332402011654826, "grad_norm": 0.2905915910185337, "learning_rate": 1.4234408876998707e-06, "loss": 0.1321, "step": 10438 }, { "epoch": 0.8333200287379261, "grad_norm": 0.26620081756918157, "learning_rate": 1.4221116686587e-06, "loss": 0.1587, "step": 10439 }, { "epoch": 0.8333998563103696, "grad_norm": 0.31357615841879427, "learning_rate": 1.4207830230154174e-06, "loss": 0.1864, "step": 10440 }, { "epoch": 0.8334796838828131, "grad_norm": 0.3173146388133213, "learning_rate": 1.4194549508588384e-06, "loss": 0.1413, "step": 10441 }, { "epoch": 0.8335595114552566, "grad_norm": 0.2954089498187863, "learning_rate": 1.4181274522777399e-06, "loss": 0.1393, "step": 10442 }, { "epoch": 0.8336393390277002, "grad_norm": 0.2722777110042113, "learning_rate": 1.4168005273608553e-06, "loss": 0.2412, "step": 10443 }, { "epoch": 0.8337191666001437, "grad_norm": 0.3046497464174467, "learning_rate": 1.4154741761968915e-06, "loss": 0.1537, "step": 10444 }, { "epoch": 0.8337989941725872, "grad_norm": 0.28339176678036104, "learning_rate": 1.4141483988745064e-06, "loss": 0.164, "step": 10445 }, { "epoch": 0.8338788217450307, "grad_norm": 0.25303442373852597, "learning_rate": 1.4128231954823224e-06, "loss": 0.1716, "step": 10446 }, { "epoch": 0.8339586493174742, "grad_norm": 0.2603194324172515, "learning_rate": 1.4114985661089232e-06, "loss": 0.1582, "step": 10447 }, { "epoch": 0.8340384768899177, "grad_norm": 0.2881385733344686, "learning_rate": 1.4101745108428576e-06, "loss": 0.1807, "step": 10448 }, { "epoch": 0.8341183044623613, "grad_norm": 0.2810850879817351, "learning_rate": 1.4088510297726333e-06, "loss": 0.1427, "step": 10449 }, { "epoch": 0.8341981320348049, "grad_norm": 0.2832084322351372, "learning_rate": 1.4075281229867189e-06, "loss": 0.158, "step": 10450 }, { "epoch": 0.8342779596072484, "grad_norm": 0.3034370387953126, "learning_rate": 1.4062057905735449e-06, "loss": 0.1483, "step": 10451 }, { "epoch": 0.8343577871796919, "grad_norm": 0.2904080516967163, "learning_rate": 1.404884032621503e-06, "loss": 0.1599, "step": 10452 }, { "epoch": 0.8344376147521354, "grad_norm": 0.30552181820452784, "learning_rate": 1.4035628492189513e-06, "loss": 0.1601, "step": 10453 }, { "epoch": 0.8345174423245789, "grad_norm": 0.2937637550066575, "learning_rate": 1.4022422404542024e-06, "loss": 0.1661, "step": 10454 }, { "epoch": 0.8345972698970224, "grad_norm": 0.2590681937403041, "learning_rate": 1.4009222064155358e-06, "loss": 0.1296, "step": 10455 }, { "epoch": 0.834677097469466, "grad_norm": 0.27181299300344935, "learning_rate": 1.399602747191189e-06, "loss": 0.138, "step": 10456 }, { "epoch": 0.8347569250419095, "grad_norm": 0.30902096617853947, "learning_rate": 1.3982838628693607e-06, "loss": 0.1287, "step": 10457 }, { "epoch": 0.834836752614353, "grad_norm": 0.2952842132656844, "learning_rate": 1.3969655535382177e-06, "loss": 0.1459, "step": 10458 }, { "epoch": 0.8349165801867965, "grad_norm": 0.2985877964916431, "learning_rate": 1.3956478192858825e-06, "loss": 0.1915, "step": 10459 }, { "epoch": 0.83499640775924, "grad_norm": 0.2993406282725163, "learning_rate": 1.394330660200439e-06, "loss": 0.1638, "step": 10460 }, { "epoch": 0.8350762353316835, "grad_norm": 0.2780505672778605, "learning_rate": 1.3930140763699318e-06, "loss": 0.1533, "step": 10461 }, { "epoch": 0.8351560629041271, "grad_norm": 0.2765039848426346, "learning_rate": 1.3916980678823754e-06, "loss": 0.1801, "step": 10462 }, { "epoch": 0.8352358904765707, "grad_norm": 0.2977692443225475, "learning_rate": 1.3903826348257355e-06, "loss": 0.1972, "step": 10463 }, { "epoch": 0.8353157180490142, "grad_norm": 0.28410225165741193, "learning_rate": 1.3890677772879457e-06, "loss": 0.1301, "step": 10464 }, { "epoch": 0.8353955456214577, "grad_norm": 0.2930761641499128, "learning_rate": 1.3877534953568972e-06, "loss": 0.1247, "step": 10465 }, { "epoch": 0.8354753731939012, "grad_norm": 0.2366921520348563, "learning_rate": 1.3864397891204428e-06, "loss": 0.1551, "step": 10466 }, { "epoch": 0.8355552007663447, "grad_norm": 0.3111078792236709, "learning_rate": 1.3851266586664057e-06, "loss": 0.1458, "step": 10467 }, { "epoch": 0.8356350283387882, "grad_norm": 0.35405500750885294, "learning_rate": 1.3838141040825536e-06, "loss": 0.1952, "step": 10468 }, { "epoch": 0.8357148559112317, "grad_norm": 0.2587679563707171, "learning_rate": 1.382502125456634e-06, "loss": 0.1645, "step": 10469 }, { "epoch": 0.8357946834836752, "grad_norm": 0.3184498621692041, "learning_rate": 1.3811907228763422e-06, "loss": 0.211, "step": 10470 }, { "epoch": 0.8358745110561188, "grad_norm": 0.2661012326203137, "learning_rate": 1.3798798964293437e-06, "loss": 0.1927, "step": 10471 }, { "epoch": 0.8359543386285623, "grad_norm": 0.2582176683116748, "learning_rate": 1.3785696462032638e-06, "loss": 0.1206, "step": 10472 }, { "epoch": 0.8360341662010058, "grad_norm": 0.2915473933784621, "learning_rate": 1.3772599722856794e-06, "loss": 0.1473, "step": 10473 }, { "epoch": 0.8361139937734493, "grad_norm": 0.261614738445664, "learning_rate": 1.3759508747641437e-06, "loss": 0.1294, "step": 10474 }, { "epoch": 0.8361938213458928, "grad_norm": 0.2685210942745491, "learning_rate": 1.374642353726161e-06, "loss": 0.1539, "step": 10475 }, { "epoch": 0.8362736489183364, "grad_norm": 0.30814998620457135, "learning_rate": 1.3733344092592082e-06, "loss": 0.1315, "step": 10476 }, { "epoch": 0.83635347649078, "grad_norm": 0.30474966882132165, "learning_rate": 1.3720270414507053e-06, "loss": 0.1575, "step": 10477 }, { "epoch": 0.8364333040632235, "grad_norm": 0.2910187129642689, "learning_rate": 1.3707202503880534e-06, "loss": 0.1626, "step": 10478 }, { "epoch": 0.836513131635667, "grad_norm": 0.30478633524959997, "learning_rate": 1.3694140361586017e-06, "loss": 0.1873, "step": 10479 }, { "epoch": 0.8365929592081105, "grad_norm": 0.301731613597038, "learning_rate": 1.368108398849668e-06, "loss": 0.1479, "step": 10480 }, { "epoch": 0.836672786780554, "grad_norm": 0.2747494539094521, "learning_rate": 1.366803338548527e-06, "loss": 0.1406, "step": 10481 }, { "epoch": 0.8367526143529975, "grad_norm": 0.32076923165555205, "learning_rate": 1.3654988553424153e-06, "loss": 0.1482, "step": 10482 }, { "epoch": 0.836832441925441, "grad_norm": 0.2887861084231781, "learning_rate": 1.364194949318538e-06, "loss": 0.1805, "step": 10483 }, { "epoch": 0.8369122694978846, "grad_norm": 0.31737183408054653, "learning_rate": 1.362891620564052e-06, "loss": 0.1679, "step": 10484 }, { "epoch": 0.8369920970703281, "grad_norm": 0.31293347229857005, "learning_rate": 1.3615888691660805e-06, "loss": 0.1857, "step": 10485 }, { "epoch": 0.8370719246427716, "grad_norm": 0.32059877802232944, "learning_rate": 1.3602866952117077e-06, "loss": 0.14, "step": 10486 }, { "epoch": 0.8371517522152151, "grad_norm": 0.2863964223359028, "learning_rate": 1.358985098787976e-06, "loss": 0.1473, "step": 10487 }, { "epoch": 0.8372315797876586, "grad_norm": 0.2564206449156938, "learning_rate": 1.3576840799818968e-06, "loss": 0.1684, "step": 10488 }, { "epoch": 0.8373114073601022, "grad_norm": 0.2978781845834604, "learning_rate": 1.3563836388804353e-06, "loss": 0.1694, "step": 10489 }, { "epoch": 0.8373912349325457, "grad_norm": 0.251542943945513, "learning_rate": 1.3550837755705204e-06, "loss": 0.1447, "step": 10490 }, { "epoch": 0.8374710625049893, "grad_norm": 0.31149147852738085, "learning_rate": 1.3537844901390419e-06, "loss": 0.1368, "step": 10491 }, { "epoch": 0.8375508900774328, "grad_norm": 0.2995558992295539, "learning_rate": 1.3524857826728555e-06, "loss": 0.1622, "step": 10492 }, { "epoch": 0.8376307176498763, "grad_norm": 0.28176319575632897, "learning_rate": 1.351187653258773e-06, "loss": 0.1697, "step": 10493 }, { "epoch": 0.8377105452223198, "grad_norm": 0.25829656654291805, "learning_rate": 1.3498901019835687e-06, "loss": 0.1501, "step": 10494 }, { "epoch": 0.8377903727947633, "grad_norm": 0.2325133479695966, "learning_rate": 1.3485931289339793e-06, "loss": 0.1402, "step": 10495 }, { "epoch": 0.8378702003672068, "grad_norm": 0.27191680777854504, "learning_rate": 1.347296734196699e-06, "loss": 0.17, "step": 10496 }, { "epoch": 0.8379500279396503, "grad_norm": 0.2617782766006538, "learning_rate": 1.3460009178583922e-06, "loss": 0.1818, "step": 10497 }, { "epoch": 0.8380298555120939, "grad_norm": 0.2806027456825302, "learning_rate": 1.344705680005677e-06, "loss": 0.2062, "step": 10498 }, { "epoch": 0.8381096830845374, "grad_norm": 0.2710194795212844, "learning_rate": 1.3434110207251338e-06, "loss": 0.1663, "step": 10499 }, { "epoch": 0.8381895106569809, "grad_norm": 0.26520961777859237, "learning_rate": 1.342116940103304e-06, "loss": 0.174, "step": 10500 }, { "epoch": 0.8382693382294244, "grad_norm": 0.2618323893101125, "learning_rate": 1.3408234382266961e-06, "loss": 0.1356, "step": 10501 }, { "epoch": 0.8383491658018679, "grad_norm": 0.3003590829297775, "learning_rate": 1.3395305151817727e-06, "loss": 0.1502, "step": 10502 }, { "epoch": 0.8384289933743115, "grad_norm": 0.2598970588765996, "learning_rate": 1.3382381710549619e-06, "loss": 0.1526, "step": 10503 }, { "epoch": 0.838508820946755, "grad_norm": 0.31787764321535983, "learning_rate": 1.336946405932651e-06, "loss": 0.2058, "step": 10504 }, { "epoch": 0.8385886485191986, "grad_norm": 0.30574714860981067, "learning_rate": 1.3356552199011874e-06, "loss": 0.1592, "step": 10505 }, { "epoch": 0.8386684760916421, "grad_norm": 0.30820639586596665, "learning_rate": 1.3343646130468868e-06, "loss": 0.1516, "step": 10506 }, { "epoch": 0.8387483036640856, "grad_norm": 0.2846809148932668, "learning_rate": 1.3330745854560167e-06, "loss": 0.1562, "step": 10507 }, { "epoch": 0.8388281312365291, "grad_norm": 0.25050817515808427, "learning_rate": 1.3317851372148127e-06, "loss": 0.1403, "step": 10508 }, { "epoch": 0.8389079588089726, "grad_norm": 0.27950825001200563, "learning_rate": 1.330496268409469e-06, "loss": 0.1555, "step": 10509 }, { "epoch": 0.8389877863814161, "grad_norm": 0.29634949135191163, "learning_rate": 1.329207979126138e-06, "loss": 0.1677, "step": 10510 }, { "epoch": 0.8390676139538596, "grad_norm": 0.2887227209242177, "learning_rate": 1.327920269450944e-06, "loss": 0.1374, "step": 10511 }, { "epoch": 0.8391474415263032, "grad_norm": 0.28603954551295, "learning_rate": 1.3266331394699571e-06, "loss": 0.1895, "step": 10512 }, { "epoch": 0.8392272690987467, "grad_norm": 0.2971471602457364, "learning_rate": 1.325346589269222e-06, "loss": 0.1145, "step": 10513 }, { "epoch": 0.8393070966711902, "grad_norm": 0.2859298688403056, "learning_rate": 1.324060618934737e-06, "loss": 0.1716, "step": 10514 }, { "epoch": 0.8393869242436337, "grad_norm": 0.2595991595164113, "learning_rate": 1.3227752285524664e-06, "loss": 0.1689, "step": 10515 }, { "epoch": 0.8394667518160773, "grad_norm": 0.31860425692935396, "learning_rate": 1.3214904182083355e-06, "loss": 0.1428, "step": 10516 }, { "epoch": 0.8395465793885208, "grad_norm": 0.27431681505633787, "learning_rate": 1.32020618798822e-06, "loss": 0.1267, "step": 10517 }, { "epoch": 0.8396264069609644, "grad_norm": 0.3025092090584734, "learning_rate": 1.3189225379779746e-06, "loss": 0.1731, "step": 10518 }, { "epoch": 0.8397062345334079, "grad_norm": 0.2617823881247601, "learning_rate": 1.3176394682633996e-06, "loss": 0.2182, "step": 10519 }, { "epoch": 0.8397860621058514, "grad_norm": 0.2810096570250855, "learning_rate": 1.316356978930271e-06, "loss": 0.1725, "step": 10520 }, { "epoch": 0.8398658896782949, "grad_norm": 0.29372019095482477, "learning_rate": 1.31507507006431e-06, "loss": 0.1499, "step": 10521 }, { "epoch": 0.8399457172507384, "grad_norm": 0.29031935536780457, "learning_rate": 1.3137937417512126e-06, "loss": 0.1425, "step": 10522 }, { "epoch": 0.8400255448231819, "grad_norm": 0.2722131386000588, "learning_rate": 1.312512994076628e-06, "loss": 0.146, "step": 10523 }, { "epoch": 0.8401053723956254, "grad_norm": 0.31630585149130014, "learning_rate": 1.3112328271261698e-06, "loss": 0.1711, "step": 10524 }, { "epoch": 0.840185199968069, "grad_norm": 0.31428674956055264, "learning_rate": 1.309953240985412e-06, "loss": 0.1952, "step": 10525 }, { "epoch": 0.8402650275405125, "grad_norm": 0.29363004564631995, "learning_rate": 1.3086742357398885e-06, "loss": 0.1642, "step": 10526 }, { "epoch": 0.840344855112956, "grad_norm": 0.3258250433936387, "learning_rate": 1.3073958114750984e-06, "loss": 0.17, "step": 10527 }, { "epoch": 0.8404246826853995, "grad_norm": 0.3287589352267056, "learning_rate": 1.3061179682764979e-06, "loss": 0.0997, "step": 10528 }, { "epoch": 0.840504510257843, "grad_norm": 0.2725286409607727, "learning_rate": 1.3048407062295066e-06, "loss": 0.1815, "step": 10529 }, { "epoch": 0.8405843378302866, "grad_norm": 0.278280027521348, "learning_rate": 1.3035640254195004e-06, "loss": 0.1383, "step": 10530 }, { "epoch": 0.8406641654027301, "grad_norm": 0.26625745732272305, "learning_rate": 1.3022879259318266e-06, "loss": 0.1323, "step": 10531 }, { "epoch": 0.8407439929751737, "grad_norm": 0.3142165679973873, "learning_rate": 1.3010124078517839e-06, "loss": 0.1825, "step": 10532 }, { "epoch": 0.8408238205476172, "grad_norm": 0.2887230060511123, "learning_rate": 1.2997374712646371e-06, "loss": 0.1368, "step": 10533 }, { "epoch": 0.8409036481200607, "grad_norm": 0.2967539706401573, "learning_rate": 1.2984631162556093e-06, "loss": 0.1136, "step": 10534 }, { "epoch": 0.8409834756925042, "grad_norm": 0.289176938764736, "learning_rate": 1.2971893429098836e-06, "loss": 0.1633, "step": 10535 }, { "epoch": 0.8410633032649477, "grad_norm": 0.2531098563751136, "learning_rate": 1.2959161513126127e-06, "loss": 0.1511, "step": 10536 }, { "epoch": 0.8411431308373912, "grad_norm": 0.26827069458895797, "learning_rate": 1.2946435415489012e-06, "loss": 0.1818, "step": 10537 }, { "epoch": 0.8412229584098347, "grad_norm": 0.34411256584048217, "learning_rate": 1.2933715137038184e-06, "loss": 0.1572, "step": 10538 }, { "epoch": 0.8413027859822783, "grad_norm": 0.279143647434151, "learning_rate": 1.2921000678623928e-06, "loss": 0.1837, "step": 10539 }, { "epoch": 0.8413826135547218, "grad_norm": 0.30546424825706886, "learning_rate": 1.290829204109616e-06, "loss": 0.109, "step": 10540 }, { "epoch": 0.8414624411271653, "grad_norm": 0.29716332101420057, "learning_rate": 1.2895589225304427e-06, "loss": 0.131, "step": 10541 }, { "epoch": 0.8415422686996088, "grad_norm": 0.27963424652425667, "learning_rate": 1.2882892232097855e-06, "loss": 0.1717, "step": 10542 }, { "epoch": 0.8416220962720524, "grad_norm": 0.318887019086407, "learning_rate": 1.287020106232516e-06, "loss": 0.2185, "step": 10543 }, { "epoch": 0.8417019238444959, "grad_norm": 0.31471492345753677, "learning_rate": 1.2857515716834711e-06, "loss": 0.1403, "step": 10544 }, { "epoch": 0.8417817514169394, "grad_norm": 0.30881736254971687, "learning_rate": 1.2844836196474486e-06, "loss": 0.1814, "step": 10545 }, { "epoch": 0.841861578989383, "grad_norm": 0.31138580081091194, "learning_rate": 1.283216250209206e-06, "loss": 0.1385, "step": 10546 }, { "epoch": 0.8419414065618265, "grad_norm": 0.2672431773221727, "learning_rate": 1.28194946345346e-06, "loss": 0.1851, "step": 10547 }, { "epoch": 0.84202123413427, "grad_norm": 0.26391610015747324, "learning_rate": 1.2806832594648921e-06, "loss": 0.1289, "step": 10548 }, { "epoch": 0.8421010617067135, "grad_norm": 0.2484349736686759, "learning_rate": 1.2794176383281397e-06, "loss": 0.1202, "step": 10549 }, { "epoch": 0.842180889279157, "grad_norm": 0.2723508858371663, "learning_rate": 1.27815260012781e-06, "loss": 0.1339, "step": 10550 }, { "epoch": 0.8422607168516005, "grad_norm": 0.3150642242617111, "learning_rate": 1.2768881449484628e-06, "loss": 0.1576, "step": 10551 }, { "epoch": 0.842340544424044, "grad_norm": 0.28979110226179156, "learning_rate": 1.2756242728746215e-06, "loss": 0.1482, "step": 10552 }, { "epoch": 0.8424203719964876, "grad_norm": 0.28839628526254213, "learning_rate": 1.2743609839907722e-06, "loss": 0.1568, "step": 10553 }, { "epoch": 0.8425001995689311, "grad_norm": 0.30701154241473816, "learning_rate": 1.2730982783813571e-06, "loss": 0.1433, "step": 10554 }, { "epoch": 0.8425800271413746, "grad_norm": 0.30439088035682155, "learning_rate": 1.2718361561307913e-06, "loss": 0.1777, "step": 10555 }, { "epoch": 0.8426598547138181, "grad_norm": 0.23597864147698852, "learning_rate": 1.2705746173234324e-06, "loss": 0.1591, "step": 10556 }, { "epoch": 0.8427396822862617, "grad_norm": 0.31251380597866735, "learning_rate": 1.2693136620436163e-06, "loss": 0.1795, "step": 10557 }, { "epoch": 0.8428195098587052, "grad_norm": 0.30039803007780075, "learning_rate": 1.2680532903756292e-06, "loss": 0.1277, "step": 10558 }, { "epoch": 0.8428993374311488, "grad_norm": 0.2685329646986599, "learning_rate": 1.2667935024037281e-06, "loss": 0.1334, "step": 10559 }, { "epoch": 0.8429791650035923, "grad_norm": 0.32481453838830904, "learning_rate": 1.2655342982121166e-06, "loss": 0.1708, "step": 10560 }, { "epoch": 0.8430589925760358, "grad_norm": 0.31903061718480247, "learning_rate": 1.264275677884973e-06, "loss": 0.1296, "step": 10561 }, { "epoch": 0.8431388201484793, "grad_norm": 0.2998322919764358, "learning_rate": 1.2630176415064289e-06, "loss": 0.1801, "step": 10562 }, { "epoch": 0.8432186477209228, "grad_norm": 0.31095688463939364, "learning_rate": 1.261760189160577e-06, "loss": 0.1591, "step": 10563 }, { "epoch": 0.8432984752933663, "grad_norm": 0.2734242150669335, "learning_rate": 1.2605033209314809e-06, "loss": 0.156, "step": 10564 }, { "epoch": 0.8433783028658098, "grad_norm": 0.2854363180689398, "learning_rate": 1.2592470369031473e-06, "loss": 0.1531, "step": 10565 }, { "epoch": 0.8434581304382534, "grad_norm": 0.31154773654948664, "learning_rate": 1.2579913371595597e-06, "loss": 0.1752, "step": 10566 }, { "epoch": 0.8435379580106969, "grad_norm": 0.3047974574843739, "learning_rate": 1.2567362217846547e-06, "loss": 0.1619, "step": 10567 }, { "epoch": 0.8436177855831404, "grad_norm": 0.3035330048774117, "learning_rate": 1.2554816908623358e-06, "loss": 0.1772, "step": 10568 }, { "epoch": 0.8436976131555839, "grad_norm": 0.29983918327913656, "learning_rate": 1.2542277444764584e-06, "loss": 0.1777, "step": 10569 }, { "epoch": 0.8437774407280274, "grad_norm": 0.36989827349227616, "learning_rate": 1.2529743827108431e-06, "loss": 0.1863, "step": 10570 }, { "epoch": 0.843857268300471, "grad_norm": 0.24674403579747067, "learning_rate": 1.2517216056492765e-06, "loss": 0.1322, "step": 10571 }, { "epoch": 0.8439370958729145, "grad_norm": 0.30621912336316714, "learning_rate": 1.2504694133755014e-06, "loss": 0.15, "step": 10572 }, { "epoch": 0.8440169234453581, "grad_norm": 0.3013523161240402, "learning_rate": 1.2492178059732186e-06, "loss": 0.2122, "step": 10573 }, { "epoch": 0.8440967510178016, "grad_norm": 0.2510972453363246, "learning_rate": 1.2479667835260933e-06, "loss": 0.1262, "step": 10574 }, { "epoch": 0.8441765785902451, "grad_norm": 0.29752498855247783, "learning_rate": 1.2467163461177556e-06, "loss": 0.1949, "step": 10575 }, { "epoch": 0.8442564061626886, "grad_norm": 0.24245326160810068, "learning_rate": 1.2454664938317896e-06, "loss": 0.1388, "step": 10576 }, { "epoch": 0.8443362337351321, "grad_norm": 0.2861965610497349, "learning_rate": 1.2442172267517438e-06, "loss": 0.1614, "step": 10577 }, { "epoch": 0.8444160613075756, "grad_norm": 0.2839527861362047, "learning_rate": 1.2429685449611251e-06, "loss": 0.1482, "step": 10578 }, { "epoch": 0.8444958888800191, "grad_norm": 0.2912843190756323, "learning_rate": 1.241720448543402e-06, "loss": 0.1632, "step": 10579 }, { "epoch": 0.8445757164524627, "grad_norm": 0.26492116842189384, "learning_rate": 1.240472937582009e-06, "loss": 0.148, "step": 10580 }, { "epoch": 0.8446555440249062, "grad_norm": 0.27138324536253233, "learning_rate": 1.239226012160335e-06, "loss": 0.1619, "step": 10581 }, { "epoch": 0.8447353715973497, "grad_norm": 0.30664175574532193, "learning_rate": 1.237979672361731e-06, "loss": 0.1881, "step": 10582 }, { "epoch": 0.8448151991697932, "grad_norm": 0.2627216357348578, "learning_rate": 1.2367339182695127e-06, "loss": 0.1822, "step": 10583 }, { "epoch": 0.8448950267422368, "grad_norm": 0.271467238305478, "learning_rate": 1.2354887499669487e-06, "loss": 0.1512, "step": 10584 }, { "epoch": 0.8449748543146803, "grad_norm": 0.35146771413645306, "learning_rate": 1.2342441675372785e-06, "loss": 0.1658, "step": 10585 }, { "epoch": 0.8450546818871238, "grad_norm": 0.30599807427766346, "learning_rate": 1.2330001710636975e-06, "loss": 0.1464, "step": 10586 }, { "epoch": 0.8451345094595674, "grad_norm": 0.2900093290361821, "learning_rate": 1.231756760629359e-06, "loss": 0.1931, "step": 10587 }, { "epoch": 0.8452143370320109, "grad_norm": 0.34433355651452546, "learning_rate": 1.2305139363173801e-06, "loss": 0.1302, "step": 10588 }, { "epoch": 0.8452941646044544, "grad_norm": 0.2925899346389716, "learning_rate": 1.2292716982108422e-06, "loss": 0.1474, "step": 10589 }, { "epoch": 0.8453739921768979, "grad_norm": 0.2699009944039128, "learning_rate": 1.2280300463927807e-06, "loss": 0.1857, "step": 10590 }, { "epoch": 0.8454538197493414, "grad_norm": 0.33188119155568424, "learning_rate": 1.2267889809461975e-06, "loss": 0.2026, "step": 10591 }, { "epoch": 0.8455336473217849, "grad_norm": 0.28361409844158, "learning_rate": 1.2255485019540503e-06, "loss": 0.104, "step": 10592 }, { "epoch": 0.8456134748942284, "grad_norm": 0.3351219895027634, "learning_rate": 1.2243086094992608e-06, "loss": 0.1301, "step": 10593 }, { "epoch": 0.845693302466672, "grad_norm": 0.2801531474824494, "learning_rate": 1.2230693036647124e-06, "loss": 0.1512, "step": 10594 }, { "epoch": 0.8457731300391155, "grad_norm": 0.30588696824584094, "learning_rate": 1.221830584533248e-06, "loss": 0.142, "step": 10595 }, { "epoch": 0.845852957611559, "grad_norm": 0.2984945032084911, "learning_rate": 1.22059245218767e-06, "loss": 0.2072, "step": 10596 }, { "epoch": 0.8459327851840025, "grad_norm": 0.2679552758483169, "learning_rate": 1.2193549067107403e-06, "loss": 0.1413, "step": 10597 }, { "epoch": 0.8460126127564461, "grad_norm": 0.3009618898734433, "learning_rate": 1.218117948185189e-06, "loss": 0.1929, "step": 10598 }, { "epoch": 0.8460924403288896, "grad_norm": 0.2685525844436873, "learning_rate": 1.2168815766937003e-06, "loss": 0.177, "step": 10599 }, { "epoch": 0.8461722679013332, "grad_norm": 0.2517708795898776, "learning_rate": 1.215645792318917e-06, "loss": 0.1439, "step": 10600 }, { "epoch": 0.8462520954737767, "grad_norm": 0.27960213328201194, "learning_rate": 1.2144105951434492e-06, "loss": 0.1202, "step": 10601 }, { "epoch": 0.8463319230462202, "grad_norm": 0.31192969617768534, "learning_rate": 1.2131759852498649e-06, "loss": 0.182, "step": 10602 }, { "epoch": 0.8464117506186637, "grad_norm": 0.28175097750811623, "learning_rate": 1.2119419627206952e-06, "loss": 0.149, "step": 10603 }, { "epoch": 0.8464915781911072, "grad_norm": 0.2649816286026171, "learning_rate": 1.2107085276384244e-06, "loss": 0.1772, "step": 10604 }, { "epoch": 0.8465714057635507, "grad_norm": 0.2596172233829722, "learning_rate": 1.2094756800855068e-06, "loss": 0.1743, "step": 10605 }, { "epoch": 0.8466512333359942, "grad_norm": 0.2655949218794201, "learning_rate": 1.2082434201443527e-06, "loss": 0.149, "step": 10606 }, { "epoch": 0.8467310609084377, "grad_norm": 0.30718742254145387, "learning_rate": 1.2070117478973308e-06, "loss": 0.1754, "step": 10607 }, { "epoch": 0.8468108884808813, "grad_norm": 0.32877219638617844, "learning_rate": 1.2057806634267799e-06, "loss": 0.1524, "step": 10608 }, { "epoch": 0.8468907160533248, "grad_norm": 0.2618447731788313, "learning_rate": 1.2045501668149862e-06, "loss": 0.1549, "step": 10609 }, { "epoch": 0.8469705436257683, "grad_norm": 0.2652033452265624, "learning_rate": 1.2033202581442071e-06, "loss": 0.1703, "step": 10610 }, { "epoch": 0.8470503711982119, "grad_norm": 0.2949421220766819, "learning_rate": 1.202090937496656e-06, "loss": 0.1579, "step": 10611 }, { "epoch": 0.8471301987706554, "grad_norm": 0.2937765125678533, "learning_rate": 1.2008622049545116e-06, "loss": 0.1513, "step": 10612 }, { "epoch": 0.8472100263430989, "grad_norm": 0.2508390332030473, "learning_rate": 1.1996340605999058e-06, "loss": 0.1212, "step": 10613 }, { "epoch": 0.8472898539155425, "grad_norm": 0.3560079700801508, "learning_rate": 1.1984065045149341e-06, "loss": 0.2147, "step": 10614 }, { "epoch": 0.847369681487986, "grad_norm": 0.2920825460751511, "learning_rate": 1.1971795367816575e-06, "loss": 0.1946, "step": 10615 }, { "epoch": 0.8474495090604295, "grad_norm": 0.26333479549044275, "learning_rate": 1.1959531574820927e-06, "loss": 0.1529, "step": 10616 }, { "epoch": 0.847529336632873, "grad_norm": 0.2805799952694243, "learning_rate": 1.1947273666982172e-06, "loss": 0.1378, "step": 10617 }, { "epoch": 0.8476091642053165, "grad_norm": 0.3596688139221306, "learning_rate": 1.1935021645119703e-06, "loss": 0.1522, "step": 10618 }, { "epoch": 0.84768899177776, "grad_norm": 0.2827061629084719, "learning_rate": 1.1922775510052541e-06, "loss": 0.2046, "step": 10619 }, { "epoch": 0.8477688193502035, "grad_norm": 0.2665628205124773, "learning_rate": 1.1910535262599276e-06, "loss": 0.1763, "step": 10620 }, { "epoch": 0.847848646922647, "grad_norm": 0.32541105847204893, "learning_rate": 1.189830090357812e-06, "loss": 0.134, "step": 10621 }, { "epoch": 0.8479284744950906, "grad_norm": 0.28296146609708017, "learning_rate": 1.1886072433806895e-06, "loss": 0.1364, "step": 10622 }, { "epoch": 0.8480083020675341, "grad_norm": 0.3043922834205968, "learning_rate": 1.1873849854103003e-06, "loss": 0.1485, "step": 10623 }, { "epoch": 0.8480881296399776, "grad_norm": 0.29949528556921473, "learning_rate": 1.1861633165283515e-06, "loss": 0.1771, "step": 10624 }, { "epoch": 0.8481679572124212, "grad_norm": 0.2854835589517272, "learning_rate": 1.1849422368165054e-06, "loss": 0.2031, "step": 10625 }, { "epoch": 0.8482477847848647, "grad_norm": 0.35172184746471935, "learning_rate": 1.1837217463563854e-06, "loss": 0.1907, "step": 10626 }, { "epoch": 0.8483276123573082, "grad_norm": 0.28017303592592996, "learning_rate": 1.1825018452295755e-06, "loss": 0.1572, "step": 10627 }, { "epoch": 0.8484074399297518, "grad_norm": 0.2746281843901558, "learning_rate": 1.1812825335176236e-06, "loss": 0.164, "step": 10628 }, { "epoch": 0.8484872675021953, "grad_norm": 0.289895037180125, "learning_rate": 1.1800638113020357e-06, "loss": 0.2182, "step": 10629 }, { "epoch": 0.8485670950746388, "grad_norm": 0.3019712868762915, "learning_rate": 1.1788456786642787e-06, "loss": 0.144, "step": 10630 }, { "epoch": 0.8486469226470823, "grad_norm": 0.2969594102351223, "learning_rate": 1.1776281356857776e-06, "loss": 0.1407, "step": 10631 }, { "epoch": 0.8487267502195258, "grad_norm": 0.2553123664380435, "learning_rate": 1.1764111824479207e-06, "loss": 0.1925, "step": 10632 }, { "epoch": 0.8488065777919693, "grad_norm": 0.27453363230506345, "learning_rate": 1.1751948190320584e-06, "loss": 0.1362, "step": 10633 }, { "epoch": 0.8488864053644128, "grad_norm": 0.23990377923562228, "learning_rate": 1.1739790455195e-06, "loss": 0.1646, "step": 10634 }, { "epoch": 0.8489662329368564, "grad_norm": 0.21186111682083686, "learning_rate": 1.1727638619915138e-06, "loss": 0.1017, "step": 10635 }, { "epoch": 0.8490460605092999, "grad_norm": 0.3007799495268545, "learning_rate": 1.1715492685293305e-06, "loss": 0.1529, "step": 10636 }, { "epoch": 0.8491258880817434, "grad_norm": 0.24377735085071964, "learning_rate": 1.1703352652141386e-06, "loss": 0.1436, "step": 10637 }, { "epoch": 0.849205715654187, "grad_norm": 0.2881142523172398, "learning_rate": 1.169121852127093e-06, "loss": 0.1811, "step": 10638 }, { "epoch": 0.8492855432266305, "grad_norm": 0.2969418566221329, "learning_rate": 1.1679090293493044e-06, "loss": 0.1826, "step": 10639 }, { "epoch": 0.849365370799074, "grad_norm": 0.2878574396698544, "learning_rate": 1.166696796961846e-06, "loss": 0.1699, "step": 10640 }, { "epoch": 0.8494451983715176, "grad_norm": 0.29226005939956134, "learning_rate": 1.1654851550457459e-06, "loss": 0.1472, "step": 10641 }, { "epoch": 0.8495250259439611, "grad_norm": 0.2907910267818719, "learning_rate": 1.1642741036820038e-06, "loss": 0.2002, "step": 10642 }, { "epoch": 0.8496048535164046, "grad_norm": 0.290080692356526, "learning_rate": 1.1630636429515741e-06, "loss": 0.2044, "step": 10643 }, { "epoch": 0.8496846810888481, "grad_norm": 0.2637177214769222, "learning_rate": 1.1618537729353629e-06, "loss": 0.1553, "step": 10644 }, { "epoch": 0.8497645086612916, "grad_norm": 0.3204257874447126, "learning_rate": 1.1606444937142536e-06, "loss": 0.1711, "step": 10645 }, { "epoch": 0.8498443362337351, "grad_norm": 0.2879580667585121, "learning_rate": 1.1594358053690769e-06, "loss": 0.199, "step": 10646 }, { "epoch": 0.8499241638061786, "grad_norm": 0.2884936576881634, "learning_rate": 1.1582277079806348e-06, "loss": 0.1781, "step": 10647 }, { "epoch": 0.8500039913786221, "grad_norm": 0.31990502740246485, "learning_rate": 1.157020201629675e-06, "loss": 0.1904, "step": 10648 }, { "epoch": 0.8500838189510657, "grad_norm": 0.2816103854364381, "learning_rate": 1.155813286396922e-06, "loss": 0.1696, "step": 10649 }, { "epoch": 0.8501636465235092, "grad_norm": 0.32117135793899293, "learning_rate": 1.1546069623630508e-06, "loss": 0.2111, "step": 10650 }, { "epoch": 0.8502434740959527, "grad_norm": 0.3135670389053723, "learning_rate": 1.1534012296086994e-06, "loss": 0.1933, "step": 10651 }, { "epoch": 0.8503233016683963, "grad_norm": 0.2883128632677458, "learning_rate": 1.1521960882144655e-06, "loss": 0.1829, "step": 10652 }, { "epoch": 0.8504031292408398, "grad_norm": 0.25544321824793337, "learning_rate": 1.1509915382609061e-06, "loss": 0.152, "step": 10653 }, { "epoch": 0.8504829568132833, "grad_norm": 0.2938442852849109, "learning_rate": 1.1497875798285462e-06, "loss": 0.1321, "step": 10654 }, { "epoch": 0.8505627843857269, "grad_norm": 0.2781191197880186, "learning_rate": 1.1485842129978597e-06, "loss": 0.1495, "step": 10655 }, { "epoch": 0.8506426119581704, "grad_norm": 0.2795364874350363, "learning_rate": 1.1473814378492942e-06, "loss": 0.1299, "step": 10656 }, { "epoch": 0.8507224395306139, "grad_norm": 0.2659733169258686, "learning_rate": 1.1461792544632422e-06, "loss": 0.1419, "step": 10657 }, { "epoch": 0.8508022671030574, "grad_norm": 0.31243477550865834, "learning_rate": 1.1449776629200704e-06, "loss": 0.1822, "step": 10658 }, { "epoch": 0.8508820946755009, "grad_norm": 0.263003228269407, "learning_rate": 1.143776663300099e-06, "loss": 0.1308, "step": 10659 }, { "epoch": 0.8509619222479444, "grad_norm": 0.27401239960574303, "learning_rate": 1.1425762556836096e-06, "loss": 0.1696, "step": 10660 }, { "epoch": 0.8510417498203879, "grad_norm": 0.27440265372566874, "learning_rate": 1.1413764401508442e-06, "loss": 0.151, "step": 10661 }, { "epoch": 0.8511215773928315, "grad_norm": 0.2530411077780612, "learning_rate": 1.1401772167820059e-06, "loss": 0.1664, "step": 10662 }, { "epoch": 0.851201404965275, "grad_norm": 0.32165397930039763, "learning_rate": 1.13897858565726e-06, "loss": 0.1559, "step": 10663 }, { "epoch": 0.8512812325377185, "grad_norm": 0.2534617528453799, "learning_rate": 1.1377805468567282e-06, "loss": 0.1865, "step": 10664 }, { "epoch": 0.8513610601101621, "grad_norm": 0.2401782220445853, "learning_rate": 1.1365831004604966e-06, "loss": 0.1759, "step": 10665 }, { "epoch": 0.8514408876826056, "grad_norm": 0.3073250410169662, "learning_rate": 1.1353862465486076e-06, "loss": 0.1598, "step": 10666 }, { "epoch": 0.8515207152550491, "grad_norm": 0.2628800991600858, "learning_rate": 1.1341899852010652e-06, "loss": 0.1479, "step": 10667 }, { "epoch": 0.8516005428274926, "grad_norm": 0.30299945618726193, "learning_rate": 1.1329943164978384e-06, "loss": 0.194, "step": 10668 }, { "epoch": 0.8516803703999362, "grad_norm": 0.27250577098761153, "learning_rate": 1.1317992405188516e-06, "loss": 0.1753, "step": 10669 }, { "epoch": 0.8517601979723797, "grad_norm": 0.2425593207643103, "learning_rate": 1.1306047573439894e-06, "loss": 0.1817, "step": 10670 }, { "epoch": 0.8518400255448232, "grad_norm": 0.2815048131214058, "learning_rate": 1.1294108670530978e-06, "loss": 0.1686, "step": 10671 }, { "epoch": 0.8519198531172667, "grad_norm": 0.2722024529832282, "learning_rate": 1.1282175697259868e-06, "loss": 0.1069, "step": 10672 }, { "epoch": 0.8519996806897102, "grad_norm": 0.2723858393992756, "learning_rate": 1.127024865442422e-06, "loss": 0.1568, "step": 10673 }, { "epoch": 0.8520795082621537, "grad_norm": 0.2561920256256327, "learning_rate": 1.1258327542821313e-06, "loss": 0.1736, "step": 10674 }, { "epoch": 0.8521593358345972, "grad_norm": 0.30804845400661657, "learning_rate": 1.1246412363248016e-06, "loss": 0.1666, "step": 10675 }, { "epoch": 0.8522391634070408, "grad_norm": 0.26180379149505506, "learning_rate": 1.1234503116500806e-06, "loss": 0.1612, "step": 10676 }, { "epoch": 0.8523189909794843, "grad_norm": 0.3912252826693106, "learning_rate": 1.122259980337579e-06, "loss": 0.2266, "step": 10677 }, { "epoch": 0.8523988185519278, "grad_norm": 0.27158759480434175, "learning_rate": 1.121070242466865e-06, "loss": 0.1949, "step": 10678 }, { "epoch": 0.8524786461243714, "grad_norm": 0.2683628321088903, "learning_rate": 1.119881098117468e-06, "loss": 0.1365, "step": 10679 }, { "epoch": 0.8525584736968149, "grad_norm": 0.2716153052989562, "learning_rate": 1.118692547368877e-06, "loss": 0.1426, "step": 10680 }, { "epoch": 0.8526383012692584, "grad_norm": 0.25341889760469966, "learning_rate": 1.11750459030054e-06, "loss": 0.1486, "step": 10681 }, { "epoch": 0.852718128841702, "grad_norm": 0.26804174415293774, "learning_rate": 1.116317226991871e-06, "loss": 0.2257, "step": 10682 }, { "epoch": 0.8527979564141455, "grad_norm": 0.29611085043691787, "learning_rate": 1.1151304575222388e-06, "loss": 0.2105, "step": 10683 }, { "epoch": 0.852877783986589, "grad_norm": 0.2857519994403502, "learning_rate": 1.1139442819709755e-06, "loss": 0.1497, "step": 10684 }, { "epoch": 0.8529576115590325, "grad_norm": 0.2874916232004802, "learning_rate": 1.112758700417368e-06, "loss": 0.16, "step": 10685 }, { "epoch": 0.853037439131476, "grad_norm": 0.27890202189899405, "learning_rate": 1.1115737129406733e-06, "loss": 0.1816, "step": 10686 }, { "epoch": 0.8531172667039195, "grad_norm": 0.29873472032362863, "learning_rate": 1.1103893196201021e-06, "loss": 0.149, "step": 10687 }, { "epoch": 0.853197094276363, "grad_norm": 0.26438801036751797, "learning_rate": 1.1092055205348217e-06, "loss": 0.2204, "step": 10688 }, { "epoch": 0.8532769218488065, "grad_norm": 0.2612676621512892, "learning_rate": 1.108022315763969e-06, "loss": 0.2188, "step": 10689 }, { "epoch": 0.8533567494212501, "grad_norm": 0.2639422207571614, "learning_rate": 1.1068397053866342e-06, "loss": 0.1449, "step": 10690 }, { "epoch": 0.8534365769936936, "grad_norm": 0.29803497286310654, "learning_rate": 1.1056576894818739e-06, "loss": 0.1845, "step": 10691 }, { "epoch": 0.8535164045661372, "grad_norm": 0.2848758266047013, "learning_rate": 1.1044762681286947e-06, "loss": 0.203, "step": 10692 }, { "epoch": 0.8535962321385807, "grad_norm": 0.3142947150147743, "learning_rate": 1.1032954414060758e-06, "loss": 0.1806, "step": 10693 }, { "epoch": 0.8536760597110242, "grad_norm": 0.2688153721930004, "learning_rate": 1.1021152093929466e-06, "loss": 0.1416, "step": 10694 }, { "epoch": 0.8537558872834677, "grad_norm": 0.3410316651147141, "learning_rate": 1.100935572168207e-06, "loss": 0.1808, "step": 10695 }, { "epoch": 0.8538357148559113, "grad_norm": 0.23735615350517322, "learning_rate": 1.099756529810705e-06, "loss": 0.203, "step": 10696 }, { "epoch": 0.8539155424283548, "grad_norm": 0.3292174967898124, "learning_rate": 1.0985780823992554e-06, "loss": 0.154, "step": 10697 }, { "epoch": 0.8539953700007983, "grad_norm": 0.33598118916555597, "learning_rate": 1.0974002300126353e-06, "loss": 0.1362, "step": 10698 }, { "epoch": 0.8540751975732418, "grad_norm": 0.3341142137774157, "learning_rate": 1.096222972729577e-06, "loss": 0.1633, "step": 10699 }, { "epoch": 0.8541550251456853, "grad_norm": 0.3047750502911185, "learning_rate": 1.095046310628781e-06, "loss": 0.1633, "step": 10700 }, { "epoch": 0.8542348527181288, "grad_norm": 0.311181739781703, "learning_rate": 1.093870243788895e-06, "loss": 0.1553, "step": 10701 }, { "epoch": 0.8543146802905723, "grad_norm": 0.30787449433256103, "learning_rate": 1.0926947722885395e-06, "loss": 0.1498, "step": 10702 }, { "epoch": 0.8543945078630159, "grad_norm": 0.2721556643712972, "learning_rate": 1.091519896206288e-06, "loss": 0.1496, "step": 10703 }, { "epoch": 0.8544743354354594, "grad_norm": 0.29825338244840593, "learning_rate": 1.090345615620677e-06, "loss": 0.1865, "step": 10704 }, { "epoch": 0.8545541630079029, "grad_norm": 0.2977491262889198, "learning_rate": 1.0891719306102033e-06, "loss": 0.1672, "step": 10705 }, { "epoch": 0.8546339905803465, "grad_norm": 0.2569534754896379, "learning_rate": 1.0879988412533193e-06, "loss": 0.1642, "step": 10706 }, { "epoch": 0.85471381815279, "grad_norm": 0.29650432565144164, "learning_rate": 1.0868263476284458e-06, "loss": 0.1562, "step": 10707 }, { "epoch": 0.8547936457252335, "grad_norm": 0.23888248247468188, "learning_rate": 1.0856544498139587e-06, "loss": 0.1501, "step": 10708 }, { "epoch": 0.854873473297677, "grad_norm": 0.2532383472802138, "learning_rate": 1.0844831478881934e-06, "loss": 0.1435, "step": 10709 }, { "epoch": 0.8549533008701206, "grad_norm": 0.2945310057095259, "learning_rate": 1.0833124419294471e-06, "loss": 0.1132, "step": 10710 }, { "epoch": 0.8550331284425641, "grad_norm": 0.2922704677615368, "learning_rate": 1.082142332015974e-06, "loss": 0.1505, "step": 10711 }, { "epoch": 0.8551129560150076, "grad_norm": 0.26093165330615825, "learning_rate": 1.0809728182259981e-06, "loss": 0.1644, "step": 10712 }, { "epoch": 0.8551927835874511, "grad_norm": 0.26747390357031076, "learning_rate": 1.0798039006376915e-06, "loss": 0.1632, "step": 10713 }, { "epoch": 0.8552726111598946, "grad_norm": 0.2497760377795033, "learning_rate": 1.0786355793291946e-06, "loss": 0.126, "step": 10714 }, { "epoch": 0.8553524387323381, "grad_norm": 0.30007543146597926, "learning_rate": 1.0774678543786e-06, "loss": 0.1455, "step": 10715 }, { "epoch": 0.8554322663047816, "grad_norm": 0.2605808914534761, "learning_rate": 1.0763007258639723e-06, "loss": 0.1657, "step": 10716 }, { "epoch": 0.8555120938772252, "grad_norm": 0.31805647737996245, "learning_rate": 1.0751341938633253e-06, "loss": 0.1913, "step": 10717 }, { "epoch": 0.8555919214496687, "grad_norm": 0.30552182123315963, "learning_rate": 1.0739682584546395e-06, "loss": 0.1419, "step": 10718 }, { "epoch": 0.8556717490221122, "grad_norm": 0.2651460607816289, "learning_rate": 1.0728029197158496e-06, "loss": 0.1514, "step": 10719 }, { "epoch": 0.8557515765945558, "grad_norm": 0.32977236221719747, "learning_rate": 1.0716381777248552e-06, "loss": 0.1444, "step": 10720 }, { "epoch": 0.8558314041669993, "grad_norm": 0.27044742310219544, "learning_rate": 1.0704740325595176e-06, "loss": 0.1439, "step": 10721 }, { "epoch": 0.8559112317394428, "grad_norm": 0.26364625868678987, "learning_rate": 1.069310484297652e-06, "loss": 0.1279, "step": 10722 }, { "epoch": 0.8559910593118863, "grad_norm": 0.2858315738640899, "learning_rate": 1.0681475330170376e-06, "loss": 0.1372, "step": 10723 }, { "epoch": 0.8560708868843299, "grad_norm": 0.2997484517905018, "learning_rate": 1.0669851787954121e-06, "loss": 0.1116, "step": 10724 }, { "epoch": 0.8561507144567734, "grad_norm": 0.2892867121069047, "learning_rate": 1.0658234217104791e-06, "loss": 0.1276, "step": 10725 }, { "epoch": 0.8562305420292169, "grad_norm": 0.3350767950254435, "learning_rate": 1.0646622618398939e-06, "loss": 0.1395, "step": 10726 }, { "epoch": 0.8563103696016604, "grad_norm": 0.34083824996055195, "learning_rate": 1.0635016992612724e-06, "loss": 0.1964, "step": 10727 }, { "epoch": 0.8563901971741039, "grad_norm": 0.28166964635841846, "learning_rate": 1.062341734052199e-06, "loss": 0.1383, "step": 10728 }, { "epoch": 0.8564700247465474, "grad_norm": 0.3005874850044043, "learning_rate": 1.0611823662902088e-06, "loss": 0.1929, "step": 10729 }, { "epoch": 0.856549852318991, "grad_norm": 0.2818704245366088, "learning_rate": 1.0600235960528039e-06, "loss": 0.1245, "step": 10730 }, { "epoch": 0.8566296798914345, "grad_norm": 0.28296218288351843, "learning_rate": 1.0588654234174434e-06, "loss": 0.1607, "step": 10731 }, { "epoch": 0.856709507463878, "grad_norm": 0.3079752284802264, "learning_rate": 1.0577078484615444e-06, "loss": 0.1302, "step": 10732 }, { "epoch": 0.8567893350363216, "grad_norm": 0.28187147203950735, "learning_rate": 1.0565508712624883e-06, "loss": 0.1701, "step": 10733 }, { "epoch": 0.8568691626087651, "grad_norm": 0.2567693976786608, "learning_rate": 1.055394491897611e-06, "loss": 0.15, "step": 10734 }, { "epoch": 0.8569489901812086, "grad_norm": 0.30399382790646057, "learning_rate": 1.0542387104442186e-06, "loss": 0.1391, "step": 10735 }, { "epoch": 0.8570288177536521, "grad_norm": 0.2872788224632571, "learning_rate": 1.0530835269795613e-06, "loss": 0.1805, "step": 10736 }, { "epoch": 0.8571086453260957, "grad_norm": 0.40655701186218196, "learning_rate": 1.0519289415808665e-06, "loss": 0.1781, "step": 10737 }, { "epoch": 0.8571884728985392, "grad_norm": 0.28282635572508047, "learning_rate": 1.0507749543253087e-06, "loss": 0.1795, "step": 10738 }, { "epoch": 0.8572683004709827, "grad_norm": 0.36134702784360523, "learning_rate": 1.0496215652900333e-06, "loss": 0.157, "step": 10739 }, { "epoch": 0.8573481280434262, "grad_norm": 0.28789669572294785, "learning_rate": 1.0484687745521327e-06, "loss": 0.1661, "step": 10740 }, { "epoch": 0.8574279556158697, "grad_norm": 0.32620796737089996, "learning_rate": 1.0473165821886689e-06, "loss": 0.1512, "step": 10741 }, { "epoch": 0.8575077831883132, "grad_norm": 0.2690133043709523, "learning_rate": 1.0461649882766633e-06, "loss": 0.1424, "step": 10742 }, { "epoch": 0.8575876107607567, "grad_norm": 0.29103725293860244, "learning_rate": 1.0450139928930947e-06, "loss": 0.1175, "step": 10743 }, { "epoch": 0.8576674383332002, "grad_norm": 0.2432578162573162, "learning_rate": 1.0438635961149012e-06, "loss": 0.1448, "step": 10744 }, { "epoch": 0.8577472659056438, "grad_norm": 0.28467986931265604, "learning_rate": 1.0427137980189816e-06, "loss": 0.1516, "step": 10745 }, { "epoch": 0.8578270934780873, "grad_norm": 0.26181173491104065, "learning_rate": 1.0415645986821987e-06, "loss": 0.1294, "step": 10746 }, { "epoch": 0.8579069210505309, "grad_norm": 0.3329419864241477, "learning_rate": 1.04041599818137e-06, "loss": 0.1722, "step": 10747 }, { "epoch": 0.8579867486229744, "grad_norm": 0.25723789416275183, "learning_rate": 1.0392679965932751e-06, "loss": 0.1214, "step": 10748 }, { "epoch": 0.8580665761954179, "grad_norm": 0.3515617391781194, "learning_rate": 1.0381205939946538e-06, "loss": 0.1711, "step": 10749 }, { "epoch": 0.8581464037678614, "grad_norm": 0.2826884587869044, "learning_rate": 1.0369737904622013e-06, "loss": 0.1556, "step": 10750 }, { "epoch": 0.858226231340305, "grad_norm": 0.2524953909730412, "learning_rate": 1.0358275860725842e-06, "loss": 0.1232, "step": 10751 }, { "epoch": 0.8583060589127485, "grad_norm": 0.2626071329454598, "learning_rate": 1.0346819809024167e-06, "loss": 0.1707, "step": 10752 }, { "epoch": 0.858385886485192, "grad_norm": 0.2889616312360596, "learning_rate": 1.0335369750282798e-06, "loss": 0.1698, "step": 10753 }, { "epoch": 0.8584657140576355, "grad_norm": 0.26941783952379383, "learning_rate": 1.0323925685267122e-06, "loss": 0.1558, "step": 10754 }, { "epoch": 0.858545541630079, "grad_norm": 0.34041365268674345, "learning_rate": 1.0312487614742107e-06, "loss": 0.175, "step": 10755 }, { "epoch": 0.8586253692025225, "grad_norm": 0.3159182241026963, "learning_rate": 1.0301055539472372e-06, "loss": 0.1772, "step": 10756 }, { "epoch": 0.858705196774966, "grad_norm": 0.2834089098095544, "learning_rate": 1.0289629460222105e-06, "loss": 0.1137, "step": 10757 }, { "epoch": 0.8587850243474096, "grad_norm": 0.25746266376935467, "learning_rate": 1.0278209377755099e-06, "loss": 0.1619, "step": 10758 }, { "epoch": 0.8588648519198531, "grad_norm": 0.29091963316032704, "learning_rate": 1.0266795292834698e-06, "loss": 0.154, "step": 10759 }, { "epoch": 0.8589446794922967, "grad_norm": 0.3247223126893447, "learning_rate": 1.0255387206223933e-06, "loss": 0.1446, "step": 10760 }, { "epoch": 0.8590245070647402, "grad_norm": 0.30438835379790136, "learning_rate": 1.0243985118685384e-06, "loss": 0.1489, "step": 10761 }, { "epoch": 0.8591043346371837, "grad_norm": 0.29812868682120236, "learning_rate": 1.0232589030981232e-06, "loss": 0.1403, "step": 10762 }, { "epoch": 0.8591841622096272, "grad_norm": 0.2893904314642552, "learning_rate": 1.0221198943873245e-06, "loss": 0.1551, "step": 10763 }, { "epoch": 0.8592639897820707, "grad_norm": 0.34554934076730276, "learning_rate": 1.0209814858122802e-06, "loss": 0.1678, "step": 10764 }, { "epoch": 0.8593438173545143, "grad_norm": 0.27085058041068577, "learning_rate": 1.0198436774490917e-06, "loss": 0.139, "step": 10765 }, { "epoch": 0.8594236449269578, "grad_norm": 0.2842103824583102, "learning_rate": 1.0187064693738157e-06, "loss": 0.1446, "step": 10766 }, { "epoch": 0.8595034724994013, "grad_norm": 0.2888380214109782, "learning_rate": 1.0175698616624685e-06, "loss": 0.158, "step": 10767 }, { "epoch": 0.8595833000718448, "grad_norm": 0.2846906540947477, "learning_rate": 1.016433854391028e-06, "loss": 0.1719, "step": 10768 }, { "epoch": 0.8596631276442883, "grad_norm": 0.2795643538912717, "learning_rate": 1.0152984476354333e-06, "loss": 0.1561, "step": 10769 }, { "epoch": 0.8597429552167318, "grad_norm": 0.30917078941048554, "learning_rate": 1.0141636414715838e-06, "loss": 0.1728, "step": 10770 }, { "epoch": 0.8598227827891753, "grad_norm": 0.2930402052166647, "learning_rate": 1.013029435975329e-06, "loss": 0.1298, "step": 10771 }, { "epoch": 0.8599026103616189, "grad_norm": 0.30721959515780134, "learning_rate": 1.0118958312224946e-06, "loss": 0.1597, "step": 10772 }, { "epoch": 0.8599824379340624, "grad_norm": 0.288386408589203, "learning_rate": 1.0107628272888514e-06, "loss": 0.1614, "step": 10773 }, { "epoch": 0.860062265506506, "grad_norm": 0.24426885880658145, "learning_rate": 1.009630424250141e-06, "loss": 0.1733, "step": 10774 }, { "epoch": 0.8601420930789495, "grad_norm": 0.3125977865635434, "learning_rate": 1.0084986221820592e-06, "loss": 0.1492, "step": 10775 }, { "epoch": 0.860221920651393, "grad_norm": 0.3046422992125657, "learning_rate": 1.0073674211602603e-06, "loss": 0.1692, "step": 10776 }, { "epoch": 0.8603017482238365, "grad_norm": 0.2885611697328577, "learning_rate": 1.0062368212603613e-06, "loss": 0.1181, "step": 10777 }, { "epoch": 0.86038157579628, "grad_norm": 0.27725285508531006, "learning_rate": 1.0051068225579375e-06, "loss": 0.154, "step": 10778 }, { "epoch": 0.8604614033687236, "grad_norm": 0.28680583307311325, "learning_rate": 1.0039774251285294e-06, "loss": 0.107, "step": 10779 }, { "epoch": 0.8605412309411671, "grad_norm": 0.2814987365994764, "learning_rate": 1.0028486290476258e-06, "loss": 0.1612, "step": 10780 }, { "epoch": 0.8606210585136106, "grad_norm": 0.25882071284123115, "learning_rate": 1.0017204343906873e-06, "loss": 0.1369, "step": 10781 }, { "epoch": 0.8607008860860541, "grad_norm": 0.29448746989231345, "learning_rate": 1.0005928412331257e-06, "loss": 0.1477, "step": 10782 }, { "epoch": 0.8607807136584976, "grad_norm": 0.3191083828256885, "learning_rate": 9.99465849650323e-07, "loss": 0.1769, "step": 10783 }, { "epoch": 0.8608605412309411, "grad_norm": 0.2904137882057262, "learning_rate": 9.983394597176067e-07, "loss": 0.1392, "step": 10784 }, { "epoch": 0.8609403688033846, "grad_norm": 0.31135348549342123, "learning_rate": 9.97213671510272e-07, "loss": 0.1571, "step": 10785 }, { "epoch": 0.8610201963758282, "grad_norm": 0.28448136301460175, "learning_rate": 9.96088485103579e-07, "loss": 0.1629, "step": 10786 }, { "epoch": 0.8611000239482718, "grad_norm": 0.32356646865051447, "learning_rate": 9.949639005727373e-07, "loss": 0.1423, "step": 10787 }, { "epoch": 0.8611798515207153, "grad_norm": 0.271002100126203, "learning_rate": 9.938399179929214e-07, "loss": 0.1636, "step": 10788 }, { "epoch": 0.8612596790931588, "grad_norm": 0.2695917825882619, "learning_rate": 9.927165374392656e-07, "loss": 0.1613, "step": 10789 }, { "epoch": 0.8613395066656023, "grad_norm": 0.27240927058886183, "learning_rate": 9.91593758986864e-07, "loss": 0.1542, "step": 10790 }, { "epoch": 0.8614193342380458, "grad_norm": 0.2727465307994771, "learning_rate": 9.904715827107702e-07, "loss": 0.1404, "step": 10791 }, { "epoch": 0.8614991618104894, "grad_norm": 0.33616663227295224, "learning_rate": 9.893500086859965e-07, "loss": 0.1435, "step": 10792 }, { "epoch": 0.8615789893829329, "grad_norm": 0.2867903211415325, "learning_rate": 9.88229036987516e-07, "loss": 0.1451, "step": 10793 }, { "epoch": 0.8616588169553764, "grad_norm": 0.31029483589277523, "learning_rate": 9.871086676902586e-07, "loss": 0.1734, "step": 10794 }, { "epoch": 0.8617386445278199, "grad_norm": 0.2979104344862064, "learning_rate": 9.859889008691214e-07, "loss": 0.1451, "step": 10795 }, { "epoch": 0.8618184721002634, "grad_norm": 0.3060243022801825, "learning_rate": 9.848697365989546e-07, "loss": 0.1759, "step": 10796 }, { "epoch": 0.8618982996727069, "grad_norm": 0.2746213131867574, "learning_rate": 9.837511749545681e-07, "loss": 0.1196, "step": 10797 }, { "epoch": 0.8619781272451504, "grad_norm": 0.2874558834697722, "learning_rate": 9.82633216010732e-07, "loss": 0.1681, "step": 10798 }, { "epoch": 0.862057954817594, "grad_norm": 0.2614045836253805, "learning_rate": 9.815158598421825e-07, "loss": 0.1515, "step": 10799 }, { "epoch": 0.8621377823900375, "grad_norm": 0.3331429715063519, "learning_rate": 9.803991065236086e-07, "loss": 0.2065, "step": 10800 }, { "epoch": 0.8622176099624811, "grad_norm": 0.27392439486711245, "learning_rate": 9.79282956129659e-07, "loss": 0.1855, "step": 10801 }, { "epoch": 0.8622974375349246, "grad_norm": 0.29685126687581015, "learning_rate": 9.781674087349457e-07, "loss": 0.1374, "step": 10802 }, { "epoch": 0.8623772651073681, "grad_norm": 0.3342523721668266, "learning_rate": 9.770524644140355e-07, "loss": 0.1314, "step": 10803 }, { "epoch": 0.8624570926798116, "grad_norm": 0.2786336415043098, "learning_rate": 9.759381232414622e-07, "loss": 0.1459, "step": 10804 }, { "epoch": 0.8625369202522551, "grad_norm": 0.28205891148673284, "learning_rate": 9.74824385291714e-07, "loss": 0.1438, "step": 10805 }, { "epoch": 0.8626167478246987, "grad_norm": 0.35280961564723834, "learning_rate": 9.737112506392389e-07, "loss": 0.1326, "step": 10806 }, { "epoch": 0.8626965753971422, "grad_norm": 0.29671512842125336, "learning_rate": 9.725987193584463e-07, "loss": 0.1377, "step": 10807 }, { "epoch": 0.8627764029695857, "grad_norm": 0.26216270758192467, "learning_rate": 9.714867915237014e-07, "loss": 0.1806, "step": 10808 }, { "epoch": 0.8628562305420292, "grad_norm": 0.3208416593912079, "learning_rate": 9.703754672093369e-07, "loss": 0.1651, "step": 10809 }, { "epoch": 0.8629360581144727, "grad_norm": 0.2636184192349328, "learning_rate": 9.692647464896388e-07, "loss": 0.1533, "step": 10810 }, { "epoch": 0.8630158856869162, "grad_norm": 0.2956201304368363, "learning_rate": 9.681546294388532e-07, "loss": 0.1608, "step": 10811 }, { "epoch": 0.8630957132593597, "grad_norm": 0.2881812687750353, "learning_rate": 9.670451161311868e-07, "loss": 0.1647, "step": 10812 }, { "epoch": 0.8631755408318033, "grad_norm": 0.2765575936256004, "learning_rate": 9.659362066408084e-07, "loss": 0.1309, "step": 10813 }, { "epoch": 0.8632553684042469, "grad_norm": 0.2717748975656399, "learning_rate": 9.64827901041846e-07, "loss": 0.1612, "step": 10814 }, { "epoch": 0.8633351959766904, "grad_norm": 0.2697263342430618, "learning_rate": 9.637201994083778e-07, "loss": 0.1585, "step": 10815 }, { "epoch": 0.8634150235491339, "grad_norm": 0.2692701583594726, "learning_rate": 9.626131018144557e-07, "loss": 0.1786, "step": 10816 }, { "epoch": 0.8634948511215774, "grad_norm": 0.28978004974885885, "learning_rate": 9.615066083340807e-07, "loss": 0.1793, "step": 10817 }, { "epoch": 0.8635746786940209, "grad_norm": 0.3054809076610353, "learning_rate": 9.604007190412246e-07, "loss": 0.1573, "step": 10818 }, { "epoch": 0.8636545062664645, "grad_norm": 0.30900646203612553, "learning_rate": 9.592954340098015e-07, "loss": 0.1592, "step": 10819 }, { "epoch": 0.863734333838908, "grad_norm": 0.27436646867685227, "learning_rate": 9.581907533137048e-07, "loss": 0.1839, "step": 10820 }, { "epoch": 0.8638141614113515, "grad_norm": 0.28912784174157596, "learning_rate": 9.570866770267728e-07, "loss": 0.1519, "step": 10821 }, { "epoch": 0.863893988983795, "grad_norm": 0.2891924856429099, "learning_rate": 9.55983205222808e-07, "loss": 0.132, "step": 10822 }, { "epoch": 0.8639738165562385, "grad_norm": 0.24518421620000663, "learning_rate": 9.54880337975579e-07, "loss": 0.1277, "step": 10823 }, { "epoch": 0.864053644128682, "grad_norm": 0.2905356730711242, "learning_rate": 9.537780753588e-07, "loss": 0.1795, "step": 10824 }, { "epoch": 0.8641334717011255, "grad_norm": 0.33335121173246246, "learning_rate": 9.526764174461578e-07, "loss": 0.2144, "step": 10825 }, { "epoch": 0.864213299273569, "grad_norm": 0.2987804002542647, "learning_rate": 9.515753643112924e-07, "loss": 0.1646, "step": 10826 }, { "epoch": 0.8642931268460126, "grad_norm": 0.26630968496356583, "learning_rate": 9.504749160278093e-07, "loss": 0.1586, "step": 10827 }, { "epoch": 0.8643729544184562, "grad_norm": 0.29547623274538304, "learning_rate": 9.493750726692608e-07, "loss": 0.1751, "step": 10828 }, { "epoch": 0.8644527819908997, "grad_norm": 0.30446420618456077, "learning_rate": 9.482758343091746e-07, "loss": 0.163, "step": 10829 }, { "epoch": 0.8645326095633432, "grad_norm": 0.298191486708649, "learning_rate": 9.471772010210267e-07, "loss": 0.1768, "step": 10830 }, { "epoch": 0.8646124371357867, "grad_norm": 0.28528074999999586, "learning_rate": 9.46079172878257e-07, "loss": 0.1373, "step": 10831 }, { "epoch": 0.8646922647082302, "grad_norm": 0.27803547831560005, "learning_rate": 9.449817499542635e-07, "loss": 0.1648, "step": 10832 }, { "epoch": 0.8647720922806738, "grad_norm": 0.3247590991755826, "learning_rate": 9.438849323224053e-07, "loss": 0.1645, "step": 10833 }, { "epoch": 0.8648519198531173, "grad_norm": 0.3109409434555376, "learning_rate": 9.427887200560015e-07, "loss": 0.2004, "step": 10834 }, { "epoch": 0.8649317474255608, "grad_norm": 0.2721162393559152, "learning_rate": 9.41693113228328e-07, "loss": 0.1427, "step": 10835 }, { "epoch": 0.8650115749980043, "grad_norm": 0.24298922798863706, "learning_rate": 9.405981119126228e-07, "loss": 0.125, "step": 10836 }, { "epoch": 0.8650914025704478, "grad_norm": 0.22660970964078836, "learning_rate": 9.395037161820831e-07, "loss": 0.1764, "step": 10837 }, { "epoch": 0.8651712301428913, "grad_norm": 0.2967230380295368, "learning_rate": 9.384099261098601e-07, "loss": 0.185, "step": 10838 }, { "epoch": 0.8652510577153348, "grad_norm": 0.28911404585814193, "learning_rate": 9.373167417690765e-07, "loss": 0.1651, "step": 10839 }, { "epoch": 0.8653308852877784, "grad_norm": 0.2550329137603258, "learning_rate": 9.362241632328029e-07, "loss": 0.2105, "step": 10840 }, { "epoch": 0.865410712860222, "grad_norm": 0.2674185700996784, "learning_rate": 9.351321905740751e-07, "loss": 0.195, "step": 10841 }, { "epoch": 0.8654905404326655, "grad_norm": 0.29323534432838805, "learning_rate": 9.340408238658838e-07, "loss": 0.1661, "step": 10842 }, { "epoch": 0.865570368005109, "grad_norm": 0.29877769149782124, "learning_rate": 9.329500631811894e-07, "loss": 0.1732, "step": 10843 }, { "epoch": 0.8656501955775525, "grad_norm": 0.3022951020271397, "learning_rate": 9.318599085929004e-07, "loss": 0.2119, "step": 10844 }, { "epoch": 0.865730023149996, "grad_norm": 0.2582999466924046, "learning_rate": 9.307703601738893e-07, "loss": 0.1765, "step": 10845 }, { "epoch": 0.8658098507224395, "grad_norm": 0.3297456170089647, "learning_rate": 9.296814179969893e-07, "loss": 0.1578, "step": 10846 }, { "epoch": 0.8658896782948831, "grad_norm": 0.26929240165411117, "learning_rate": 9.285930821349897e-07, "loss": 0.1791, "step": 10847 }, { "epoch": 0.8659695058673266, "grad_norm": 0.32993079364401473, "learning_rate": 9.275053526606448e-07, "loss": 0.167, "step": 10848 }, { "epoch": 0.8660493334397701, "grad_norm": 0.24946353227699292, "learning_rate": 9.264182296466639e-07, "loss": 0.1248, "step": 10849 }, { "epoch": 0.8661291610122136, "grad_norm": 0.26580931656996853, "learning_rate": 9.253317131657158e-07, "loss": 0.1819, "step": 10850 }, { "epoch": 0.8662089885846571, "grad_norm": 0.278142479722984, "learning_rate": 9.242458032904311e-07, "loss": 0.1706, "step": 10851 }, { "epoch": 0.8662888161571006, "grad_norm": 0.2638859406251513, "learning_rate": 9.231605000933952e-07, "loss": 0.1176, "step": 10852 }, { "epoch": 0.8663686437295441, "grad_norm": 0.29488027767855535, "learning_rate": 9.22075803647161e-07, "loss": 0.1512, "step": 10853 }, { "epoch": 0.8664484713019877, "grad_norm": 0.3173726520771776, "learning_rate": 9.209917140242341e-07, "loss": 0.1653, "step": 10854 }, { "epoch": 0.8665282988744313, "grad_norm": 0.2850281327090361, "learning_rate": 9.19908231297082e-07, "loss": 0.157, "step": 10855 }, { "epoch": 0.8666081264468748, "grad_norm": 0.2773592981086968, "learning_rate": 9.188253555381288e-07, "loss": 0.1511, "step": 10856 }, { "epoch": 0.8666879540193183, "grad_norm": 0.29841145145780706, "learning_rate": 9.177430868197646e-07, "loss": 0.1333, "step": 10857 }, { "epoch": 0.8667677815917618, "grad_norm": 0.32818908774618843, "learning_rate": 9.166614252143325e-07, "loss": 0.1663, "step": 10858 }, { "epoch": 0.8668476091642053, "grad_norm": 0.2880273680501322, "learning_rate": 9.15580370794138e-07, "loss": 0.1467, "step": 10859 }, { "epoch": 0.8669274367366488, "grad_norm": 0.3019224949540863, "learning_rate": 9.144999236314445e-07, "loss": 0.1684, "step": 10860 }, { "epoch": 0.8670072643090924, "grad_norm": 0.23891966046167537, "learning_rate": 9.134200837984742e-07, "loss": 0.1578, "step": 10861 }, { "epoch": 0.8670870918815359, "grad_norm": 0.27443196632810724, "learning_rate": 9.123408513674148e-07, "loss": 0.1562, "step": 10862 }, { "epoch": 0.8671669194539794, "grad_norm": 0.3199183556215462, "learning_rate": 9.112622264104032e-07, "loss": 0.1871, "step": 10863 }, { "epoch": 0.8672467470264229, "grad_norm": 0.26446291720895476, "learning_rate": 9.101842089995449e-07, "loss": 0.1999, "step": 10864 }, { "epoch": 0.8673265745988664, "grad_norm": 0.2990885085899791, "learning_rate": 9.091067992068981e-07, "loss": 0.1311, "step": 10865 }, { "epoch": 0.8674064021713099, "grad_norm": 0.2972914090755974, "learning_rate": 9.08029997104487e-07, "loss": 0.1624, "step": 10866 }, { "epoch": 0.8674862297437534, "grad_norm": 0.2891741107006466, "learning_rate": 9.069538027642921e-07, "loss": 0.1555, "step": 10867 }, { "epoch": 0.867566057316197, "grad_norm": 0.26644478455556336, "learning_rate": 9.05878216258247e-07, "loss": 0.1713, "step": 10868 }, { "epoch": 0.8676458848886406, "grad_norm": 0.2990088421184433, "learning_rate": 9.048032376582561e-07, "loss": 0.1922, "step": 10869 }, { "epoch": 0.8677257124610841, "grad_norm": 0.27077105813886654, "learning_rate": 9.037288670361721e-07, "loss": 0.1849, "step": 10870 }, { "epoch": 0.8678055400335276, "grad_norm": 0.26573568936825825, "learning_rate": 9.026551044638199e-07, "loss": 0.1978, "step": 10871 }, { "epoch": 0.8678853676059711, "grad_norm": 0.33602216566571735, "learning_rate": 9.015819500129685e-07, "loss": 0.1399, "step": 10872 }, { "epoch": 0.8679651951784146, "grad_norm": 0.2791037810234062, "learning_rate": 9.005094037553586e-07, "loss": 0.1119, "step": 10873 }, { "epoch": 0.8680450227508582, "grad_norm": 0.30492698173896404, "learning_rate": 8.994374657626847e-07, "loss": 0.1619, "step": 10874 }, { "epoch": 0.8681248503233017, "grad_norm": 0.32079319172655335, "learning_rate": 8.983661361066021e-07, "loss": 0.1594, "step": 10875 }, { "epoch": 0.8682046778957452, "grad_norm": 0.30580958436677763, "learning_rate": 8.972954148587243e-07, "loss": 0.1576, "step": 10876 }, { "epoch": 0.8682845054681887, "grad_norm": 0.3025097942562598, "learning_rate": 8.96225302090622e-07, "loss": 0.2093, "step": 10877 }, { "epoch": 0.8683643330406322, "grad_norm": 0.29134288131456765, "learning_rate": 8.951557978738335e-07, "loss": 0.1458, "step": 10878 }, { "epoch": 0.8684441606130757, "grad_norm": 0.2719099932097451, "learning_rate": 8.94086902279847e-07, "loss": 0.1373, "step": 10879 }, { "epoch": 0.8685239881855192, "grad_norm": 0.34492157988826555, "learning_rate": 8.930186153801168e-07, "loss": 0.1776, "step": 10880 }, { "epoch": 0.8686038157579627, "grad_norm": 0.28967789769496083, "learning_rate": 8.919509372460511e-07, "loss": 0.1696, "step": 10881 }, { "epoch": 0.8686836433304064, "grad_norm": 0.2847827964314577, "learning_rate": 8.908838679490184e-07, "loss": 0.1788, "step": 10882 }, { "epoch": 0.8687634709028499, "grad_norm": 0.299344223932755, "learning_rate": 8.898174075603539e-07, "loss": 0.1157, "step": 10883 }, { "epoch": 0.8688432984752934, "grad_norm": 0.3315955910433165, "learning_rate": 8.887515561513416e-07, "loss": 0.1765, "step": 10884 }, { "epoch": 0.8689231260477369, "grad_norm": 0.30757423961803293, "learning_rate": 8.876863137932312e-07, "loss": 0.1392, "step": 10885 }, { "epoch": 0.8690029536201804, "grad_norm": 0.24332896871539594, "learning_rate": 8.86621680557227e-07, "loss": 0.179, "step": 10886 }, { "epoch": 0.8690827811926239, "grad_norm": 0.29205910630310494, "learning_rate": 8.855576565145008e-07, "loss": 0.2038, "step": 10887 }, { "epoch": 0.8691626087650675, "grad_norm": 0.28191269457594675, "learning_rate": 8.844942417361746e-07, "loss": 0.1289, "step": 10888 }, { "epoch": 0.869242436337511, "grad_norm": 0.26935710140292335, "learning_rate": 8.834314362933349e-07, "loss": 0.0997, "step": 10889 }, { "epoch": 0.8693222639099545, "grad_norm": 0.30847418303427443, "learning_rate": 8.82369240257026e-07, "loss": 0.1244, "step": 10890 }, { "epoch": 0.869402091482398, "grad_norm": 0.2475439389139916, "learning_rate": 8.81307653698249e-07, "loss": 0.1599, "step": 10891 }, { "epoch": 0.8694819190548415, "grad_norm": 0.30261734634976734, "learning_rate": 8.802466766879703e-07, "loss": 0.1566, "step": 10892 }, { "epoch": 0.869561746627285, "grad_norm": 0.2420617030393648, "learning_rate": 8.791863092971109e-07, "loss": 0.174, "step": 10893 }, { "epoch": 0.8696415741997285, "grad_norm": 0.30674275798530726, "learning_rate": 8.781265515965509e-07, "loss": 0.1487, "step": 10894 }, { "epoch": 0.869721401772172, "grad_norm": 0.2935854398229349, "learning_rate": 8.770674036571314e-07, "loss": 0.1404, "step": 10895 }, { "epoch": 0.8698012293446157, "grad_norm": 0.3172042286084307, "learning_rate": 8.760088655496535e-07, "loss": 0.1499, "step": 10896 }, { "epoch": 0.8698810569170592, "grad_norm": 0.26383469170435947, "learning_rate": 8.749509373448761e-07, "loss": 0.1372, "step": 10897 }, { "epoch": 0.8699608844895027, "grad_norm": 0.30460609920633, "learning_rate": 8.738936191135161e-07, "loss": 0.1413, "step": 10898 }, { "epoch": 0.8700407120619462, "grad_norm": 0.2859155627117897, "learning_rate": 8.728369109262524e-07, "loss": 0.1583, "step": 10899 }, { "epoch": 0.8701205396343897, "grad_norm": 0.25673832686266923, "learning_rate": 8.717808128537186e-07, "loss": 0.1983, "step": 10900 }, { "epoch": 0.8702003672068332, "grad_norm": 0.2778135622639541, "learning_rate": 8.70725324966516e-07, "loss": 0.1184, "step": 10901 }, { "epoch": 0.8702801947792768, "grad_norm": 0.29679071764436515, "learning_rate": 8.696704473351969e-07, "loss": 0.1364, "step": 10902 }, { "epoch": 0.8703600223517203, "grad_norm": 0.29779951614774824, "learning_rate": 8.68616180030275e-07, "loss": 0.1559, "step": 10903 }, { "epoch": 0.8704398499241638, "grad_norm": 0.29825341973377706, "learning_rate": 8.675625231222262e-07, "loss": 0.1723, "step": 10904 }, { "epoch": 0.8705196774966073, "grad_norm": 0.2648501431877296, "learning_rate": 8.665094766814785e-07, "loss": 0.1186, "step": 10905 }, { "epoch": 0.8705995050690508, "grad_norm": 0.26269921281967507, "learning_rate": 8.654570407784313e-07, "loss": 0.1694, "step": 10906 }, { "epoch": 0.8706793326414943, "grad_norm": 0.2731718258791388, "learning_rate": 8.644052154834282e-07, "loss": 0.1259, "step": 10907 }, { "epoch": 0.8707591602139378, "grad_norm": 0.27883553441920067, "learning_rate": 8.633540008667851e-07, "loss": 0.1358, "step": 10908 }, { "epoch": 0.8708389877863815, "grad_norm": 0.3121751696971288, "learning_rate": 8.623033969987682e-07, "loss": 0.1339, "step": 10909 }, { "epoch": 0.870918815358825, "grad_norm": 0.24641041606736291, "learning_rate": 8.612534039496101e-07, "loss": 0.2076, "step": 10910 }, { "epoch": 0.8709986429312685, "grad_norm": 0.2850084830991866, "learning_rate": 8.602040217894946e-07, "loss": 0.1359, "step": 10911 }, { "epoch": 0.871078470503712, "grad_norm": 0.2799690820790861, "learning_rate": 8.591552505885681e-07, "loss": 0.1921, "step": 10912 }, { "epoch": 0.8711582980761555, "grad_norm": 0.31918717638139654, "learning_rate": 8.581070904169408e-07, "loss": 0.1895, "step": 10913 }, { "epoch": 0.871238125648599, "grad_norm": 0.2880786881747322, "learning_rate": 8.570595413446748e-07, "loss": 0.1532, "step": 10914 }, { "epoch": 0.8713179532210426, "grad_norm": 0.2617417073283453, "learning_rate": 8.560126034417993e-07, "loss": 0.1558, "step": 10915 }, { "epoch": 0.8713977807934861, "grad_norm": 0.32712143148658446, "learning_rate": 8.549662767782906e-07, "loss": 0.1982, "step": 10916 }, { "epoch": 0.8714776083659296, "grad_norm": 0.29236195559909306, "learning_rate": 8.539205614240975e-07, "loss": 0.1636, "step": 10917 }, { "epoch": 0.8715574359383731, "grad_norm": 0.3011700122657105, "learning_rate": 8.528754574491205e-07, "loss": 0.1674, "step": 10918 }, { "epoch": 0.8716372635108166, "grad_norm": 0.27468753259134215, "learning_rate": 8.518309649232204e-07, "loss": 0.1396, "step": 10919 }, { "epoch": 0.8717170910832601, "grad_norm": 0.31980344990287946, "learning_rate": 8.507870839162158e-07, "loss": 0.1481, "step": 10920 }, { "epoch": 0.8717969186557036, "grad_norm": 0.2727262811201991, "learning_rate": 8.497438144978865e-07, "loss": 0.1616, "step": 10921 }, { "epoch": 0.8718767462281471, "grad_norm": 0.2856416887167446, "learning_rate": 8.487011567379733e-07, "loss": 0.1697, "step": 10922 }, { "epoch": 0.8719565738005908, "grad_norm": 0.3001548799187548, "learning_rate": 8.476591107061727e-07, "loss": 0.1829, "step": 10923 }, { "epoch": 0.8720364013730343, "grad_norm": 0.3381364998444478, "learning_rate": 8.466176764721401e-07, "loss": 0.123, "step": 10924 }, { "epoch": 0.8721162289454778, "grad_norm": 0.28381515084645315, "learning_rate": 8.45576854105491e-07, "loss": 0.1428, "step": 10925 }, { "epoch": 0.8721960565179213, "grad_norm": 0.24945194907582713, "learning_rate": 8.445366436758029e-07, "loss": 0.1689, "step": 10926 }, { "epoch": 0.8722758840903648, "grad_norm": 0.2744572912468571, "learning_rate": 8.434970452526081e-07, "loss": 0.2101, "step": 10927 }, { "epoch": 0.8723557116628083, "grad_norm": 0.2921773882081666, "learning_rate": 8.424580589053988e-07, "loss": 0.1424, "step": 10928 }, { "epoch": 0.8724355392352519, "grad_norm": 0.2553863511068017, "learning_rate": 8.414196847036294e-07, "loss": 0.1816, "step": 10929 }, { "epoch": 0.8725153668076954, "grad_norm": 0.2952241079897165, "learning_rate": 8.403819227167064e-07, "loss": 0.1656, "step": 10930 }, { "epoch": 0.8725951943801389, "grad_norm": 0.26677696760513503, "learning_rate": 8.39344773014007e-07, "loss": 0.1564, "step": 10931 }, { "epoch": 0.8726750219525824, "grad_norm": 0.2510716098544401, "learning_rate": 8.383082356648553e-07, "loss": 0.1521, "step": 10932 }, { "epoch": 0.8727548495250259, "grad_norm": 0.27899700443215286, "learning_rate": 8.372723107385428e-07, "loss": 0.1691, "step": 10933 }, { "epoch": 0.8728346770974694, "grad_norm": 0.2905539213383835, "learning_rate": 8.362369983043139e-07, "loss": 0.2154, "step": 10934 }, { "epoch": 0.8729145046699129, "grad_norm": 0.24768936611323678, "learning_rate": 8.352022984313757e-07, "loss": 0.1552, "step": 10935 }, { "epoch": 0.8729943322423566, "grad_norm": 0.297530543546371, "learning_rate": 8.341682111888971e-07, "loss": 0.195, "step": 10936 }, { "epoch": 0.8730741598148001, "grad_norm": 0.24332322061562892, "learning_rate": 8.331347366460007e-07, "loss": 0.1608, "step": 10937 }, { "epoch": 0.8731539873872436, "grad_norm": 0.2796353035012095, "learning_rate": 8.321018748717691e-07, "loss": 0.1096, "step": 10938 }, { "epoch": 0.8732338149596871, "grad_norm": 0.30883131961810695, "learning_rate": 8.310696259352447e-07, "loss": 0.1235, "step": 10939 }, { "epoch": 0.8733136425321306, "grad_norm": 0.3307807469038333, "learning_rate": 8.300379899054323e-07, "loss": 0.1742, "step": 10940 }, { "epoch": 0.8733934701045741, "grad_norm": 0.3111217455756987, "learning_rate": 8.290069668512912e-07, "loss": 0.1448, "step": 10941 }, { "epoch": 0.8734732976770176, "grad_norm": 0.26834150102959886, "learning_rate": 8.279765568417398e-07, "loss": 0.1597, "step": 10942 }, { "epoch": 0.8735531252494612, "grad_norm": 0.2805301021555486, "learning_rate": 8.269467599456582e-07, "loss": 0.1524, "step": 10943 }, { "epoch": 0.8736329528219047, "grad_norm": 0.2936944961497081, "learning_rate": 8.259175762318827e-07, "loss": 0.1668, "step": 10944 }, { "epoch": 0.8737127803943482, "grad_norm": 0.2843303315427163, "learning_rate": 8.248890057692139e-07, "loss": 0.1635, "step": 10945 }, { "epoch": 0.8737926079667917, "grad_norm": 0.2625642145696223, "learning_rate": 8.238610486264043e-07, "loss": 0.1529, "step": 10946 }, { "epoch": 0.8738724355392352, "grad_norm": 0.26824548408263704, "learning_rate": 8.228337048721702e-07, "loss": 0.2161, "step": 10947 }, { "epoch": 0.8739522631116787, "grad_norm": 0.2990577134063311, "learning_rate": 8.218069745751844e-07, "loss": 0.1564, "step": 10948 }, { "epoch": 0.8740320906841222, "grad_norm": 0.28560466491949343, "learning_rate": 8.207808578040799e-07, "loss": 0.1377, "step": 10949 }, { "epoch": 0.8741119182565659, "grad_norm": 0.30299847996004725, "learning_rate": 8.197553546274517e-07, "loss": 0.2124, "step": 10950 }, { "epoch": 0.8741917458290094, "grad_norm": 0.3315944531819501, "learning_rate": 8.187304651138439e-07, "loss": 0.1644, "step": 10951 }, { "epoch": 0.8742715734014529, "grad_norm": 0.28616320728355177, "learning_rate": 8.177061893317728e-07, "loss": 0.1217, "step": 10952 }, { "epoch": 0.8743514009738964, "grad_norm": 0.27110958901562016, "learning_rate": 8.166825273497025e-07, "loss": 0.1424, "step": 10953 }, { "epoch": 0.8744312285463399, "grad_norm": 0.27774253172723185, "learning_rate": 8.156594792360673e-07, "loss": 0.1669, "step": 10954 }, { "epoch": 0.8745110561187834, "grad_norm": 0.27310762720892195, "learning_rate": 8.146370450592455e-07, "loss": 0.1529, "step": 10955 }, { "epoch": 0.874590883691227, "grad_norm": 0.24664366231080423, "learning_rate": 8.136152248875883e-07, "loss": 0.186, "step": 10956 }, { "epoch": 0.8746707112636705, "grad_norm": 0.32554364528868657, "learning_rate": 8.125940187893999e-07, "loss": 0.1722, "step": 10957 }, { "epoch": 0.874750538836114, "grad_norm": 0.29730576914782086, "learning_rate": 8.115734268329401e-07, "loss": 0.1374, "step": 10958 }, { "epoch": 0.8748303664085575, "grad_norm": 0.31820274891658296, "learning_rate": 8.105534490864386e-07, "loss": 0.1713, "step": 10959 }, { "epoch": 0.874910193981001, "grad_norm": 0.25277146663189176, "learning_rate": 8.09534085618069e-07, "loss": 0.1624, "step": 10960 }, { "epoch": 0.8749900215534445, "grad_norm": 0.3050630676628107, "learning_rate": 8.085153364959763e-07, "loss": 0.161, "step": 10961 }, { "epoch": 0.875069849125888, "grad_norm": 0.27306518875348107, "learning_rate": 8.074972017882565e-07, "loss": 0.1532, "step": 10962 }, { "epoch": 0.8751496766983317, "grad_norm": 0.2834588799648558, "learning_rate": 8.06479681562975e-07, "loss": 0.1718, "step": 10963 }, { "epoch": 0.8752295042707752, "grad_norm": 0.27912757284870104, "learning_rate": 8.054627758881417e-07, "loss": 0.1545, "step": 10964 }, { "epoch": 0.8753093318432187, "grad_norm": 0.2687881955467574, "learning_rate": 8.044464848317335e-07, "loss": 0.1565, "step": 10965 }, { "epoch": 0.8753891594156622, "grad_norm": 0.2716952757158436, "learning_rate": 8.034308084616893e-07, "loss": 0.1453, "step": 10966 }, { "epoch": 0.8754689869881057, "grad_norm": 0.2731991775317494, "learning_rate": 8.024157468459004e-07, "loss": 0.1661, "step": 10967 }, { "epoch": 0.8755488145605492, "grad_norm": 0.28220927539891333, "learning_rate": 8.014013000522202e-07, "loss": 0.1674, "step": 10968 }, { "epoch": 0.8756286421329927, "grad_norm": 0.26749483458733725, "learning_rate": 8.00387468148458e-07, "loss": 0.142, "step": 10969 }, { "epoch": 0.8757084697054363, "grad_norm": 0.29493115271804066, "learning_rate": 7.993742512023894e-07, "loss": 0.1512, "step": 10970 }, { "epoch": 0.8757882972778798, "grad_norm": 0.28466049627973766, "learning_rate": 7.983616492817414e-07, "loss": 0.1796, "step": 10971 }, { "epoch": 0.8758681248503233, "grad_norm": 0.2317379079958621, "learning_rate": 7.973496624542021e-07, "loss": 0.1913, "step": 10972 }, { "epoch": 0.8759479524227668, "grad_norm": 0.30986152223209185, "learning_rate": 7.963382907874184e-07, "loss": 0.1975, "step": 10973 }, { "epoch": 0.8760277799952103, "grad_norm": 0.27879572703210775, "learning_rate": 7.953275343489964e-07, "loss": 0.1311, "step": 10974 }, { "epoch": 0.8761076075676538, "grad_norm": 0.29455393003366814, "learning_rate": 7.943173932065029e-07, "loss": 0.1655, "step": 10975 }, { "epoch": 0.8761874351400973, "grad_norm": 0.2867488643031818, "learning_rate": 7.933078674274619e-07, "loss": 0.1562, "step": 10976 }, { "epoch": 0.876267262712541, "grad_norm": 0.2923069789981248, "learning_rate": 7.922989570793549e-07, "loss": 0.1826, "step": 10977 }, { "epoch": 0.8763470902849845, "grad_norm": 0.3663003220745416, "learning_rate": 7.912906622296235e-07, "loss": 0.1311, "step": 10978 }, { "epoch": 0.876426917857428, "grad_norm": 0.2771777877524369, "learning_rate": 7.90282982945667e-07, "loss": 0.1817, "step": 10979 }, { "epoch": 0.8765067454298715, "grad_norm": 0.29893955477767525, "learning_rate": 7.892759192948474e-07, "loss": 0.1692, "step": 10980 }, { "epoch": 0.876586573002315, "grad_norm": 0.36317026956992077, "learning_rate": 7.882694713444827e-07, "loss": 0.156, "step": 10981 }, { "epoch": 0.8766664005747585, "grad_norm": 0.33322909776342163, "learning_rate": 7.872636391618494e-07, "loss": 0.1458, "step": 10982 }, { "epoch": 0.876746228147202, "grad_norm": 0.2921990073676519, "learning_rate": 7.862584228141801e-07, "loss": 0.1491, "step": 10983 }, { "epoch": 0.8768260557196456, "grad_norm": 0.28240516562867074, "learning_rate": 7.852538223686757e-07, "loss": 0.1675, "step": 10984 }, { "epoch": 0.8769058832920891, "grad_norm": 0.2606785545287774, "learning_rate": 7.842498378924879e-07, "loss": 0.1999, "step": 10985 }, { "epoch": 0.8769857108645326, "grad_norm": 0.3343767408401015, "learning_rate": 7.832464694527242e-07, "loss": 0.1647, "step": 10986 }, { "epoch": 0.8770655384369761, "grad_norm": 0.29488142234523407, "learning_rate": 7.82243717116461e-07, "loss": 0.1481, "step": 10987 }, { "epoch": 0.8771453660094196, "grad_norm": 0.3148135230321034, "learning_rate": 7.812415809507256e-07, "loss": 0.1562, "step": 10988 }, { "epoch": 0.8772251935818631, "grad_norm": 0.3069824919098208, "learning_rate": 7.802400610225092e-07, "loss": 0.1491, "step": 10989 }, { "epoch": 0.8773050211543068, "grad_norm": 0.2785585110616724, "learning_rate": 7.792391573987579e-07, "loss": 0.1359, "step": 10990 }, { "epoch": 0.8773848487267503, "grad_norm": 0.28371433688282294, "learning_rate": 7.782388701463795e-07, "loss": 0.1687, "step": 10991 }, { "epoch": 0.8774646762991938, "grad_norm": 0.3127333564012756, "learning_rate": 7.772391993322359e-07, "loss": 0.1532, "step": 10992 }, { "epoch": 0.8775445038716373, "grad_norm": 0.27660494450988576, "learning_rate": 7.762401450231549e-07, "loss": 0.1706, "step": 10993 }, { "epoch": 0.8776243314440808, "grad_norm": 0.2660724188503042, "learning_rate": 7.752417072859208e-07, "loss": 0.15, "step": 10994 }, { "epoch": 0.8777041590165243, "grad_norm": 0.32256378414724585, "learning_rate": 7.742438861872681e-07, "loss": 0.1715, "step": 10995 }, { "epoch": 0.8777839865889678, "grad_norm": 0.2650575424802857, "learning_rate": 7.732466817939022e-07, "loss": 0.1542, "step": 10996 }, { "epoch": 0.8778638141614113, "grad_norm": 0.28838552170205284, "learning_rate": 7.722500941724809e-07, "loss": 0.1636, "step": 10997 }, { "epoch": 0.8779436417338549, "grad_norm": 0.2904727457572109, "learning_rate": 7.712541233896264e-07, "loss": 0.1668, "step": 10998 }, { "epoch": 0.8780234693062984, "grad_norm": 0.30607394609766897, "learning_rate": 7.702587695119068e-07, "loss": 0.142, "step": 10999 }, { "epoch": 0.8781032968787419, "grad_norm": 0.25412521498874097, "learning_rate": 7.692640326058654e-07, "loss": 0.1507, "step": 11000 }, { "epoch": 0.8781831244511854, "grad_norm": 0.32438132227170846, "learning_rate": 7.682699127379934e-07, "loss": 0.1424, "step": 11001 }, { "epoch": 0.8782629520236289, "grad_norm": 0.31105063698678914, "learning_rate": 7.672764099747443e-07, "loss": 0.1654, "step": 11002 }, { "epoch": 0.8783427795960724, "grad_norm": 0.296984772775277, "learning_rate": 7.662835243825284e-07, "loss": 0.1704, "step": 11003 }, { "epoch": 0.8784226071685161, "grad_norm": 0.2956976144724424, "learning_rate": 7.652912560277159e-07, "loss": 0.1881, "step": 11004 }, { "epoch": 0.8785024347409596, "grad_norm": 0.30066028480683366, "learning_rate": 7.642996049766393e-07, "loss": 0.1981, "step": 11005 }, { "epoch": 0.8785822623134031, "grad_norm": 0.29158178531040363, "learning_rate": 7.633085712955823e-07, "loss": 0.1146, "step": 11006 }, { "epoch": 0.8786620898858466, "grad_norm": 0.28395002864412516, "learning_rate": 7.623181550507974e-07, "loss": 0.1952, "step": 11007 }, { "epoch": 0.8787419174582901, "grad_norm": 0.24351753144021718, "learning_rate": 7.61328356308485e-07, "loss": 0.1324, "step": 11008 }, { "epoch": 0.8788217450307336, "grad_norm": 0.3005466252951182, "learning_rate": 7.603391751348089e-07, "loss": 0.1513, "step": 11009 }, { "epoch": 0.8789015726031771, "grad_norm": 0.25963394137509127, "learning_rate": 7.593506115958949e-07, "loss": 0.1186, "step": 11010 }, { "epoch": 0.8789814001756207, "grad_norm": 0.31004577426060026, "learning_rate": 7.583626657578235e-07, "loss": 0.136, "step": 11011 }, { "epoch": 0.8790612277480642, "grad_norm": 0.26604259509747197, "learning_rate": 7.573753376866356e-07, "loss": 0.1813, "step": 11012 }, { "epoch": 0.8791410553205077, "grad_norm": 0.2690574008482343, "learning_rate": 7.563886274483267e-07, "loss": 0.1937, "step": 11013 }, { "epoch": 0.8792208828929512, "grad_norm": 0.281343729343227, "learning_rate": 7.55402535108859e-07, "loss": 0.142, "step": 11014 }, { "epoch": 0.8793007104653947, "grad_norm": 0.30512441103748844, "learning_rate": 7.544170607341473e-07, "loss": 0.1916, "step": 11015 }, { "epoch": 0.8793805380378382, "grad_norm": 0.27922479436183584, "learning_rate": 7.534322043900666e-07, "loss": 0.1899, "step": 11016 }, { "epoch": 0.8794603656102818, "grad_norm": 0.285162601414377, "learning_rate": 7.5244796614245e-07, "loss": 0.1596, "step": 11017 }, { "epoch": 0.8795401931827254, "grad_norm": 0.2742169819707299, "learning_rate": 7.514643460570891e-07, "loss": 0.1465, "step": 11018 }, { "epoch": 0.8796200207551689, "grad_norm": 0.29136377623980547, "learning_rate": 7.504813441997382e-07, "loss": 0.1281, "step": 11019 }, { "epoch": 0.8796998483276124, "grad_norm": 0.2662094931807464, "learning_rate": 7.494989606361047e-07, "loss": 0.1372, "step": 11020 }, { "epoch": 0.8797796759000559, "grad_norm": 0.2753441247110037, "learning_rate": 7.485171954318582e-07, "loss": 0.1708, "step": 11021 }, { "epoch": 0.8798595034724994, "grad_norm": 0.3357674952710683, "learning_rate": 7.475360486526228e-07, "loss": 0.177, "step": 11022 }, { "epoch": 0.8799393310449429, "grad_norm": 0.2906169035336798, "learning_rate": 7.465555203639885e-07, "loss": 0.141, "step": 11023 }, { "epoch": 0.8800191586173864, "grad_norm": 0.29358467267559324, "learning_rate": 7.455756106314982e-07, "loss": 0.2214, "step": 11024 }, { "epoch": 0.88009898618983, "grad_norm": 0.27358194837854116, "learning_rate": 7.44596319520654e-07, "loss": 0.1413, "step": 11025 }, { "epoch": 0.8801788137622735, "grad_norm": 0.3060027176952087, "learning_rate": 7.436176470969192e-07, "loss": 0.1728, "step": 11026 }, { "epoch": 0.880258641334717, "grad_norm": 0.29444815691749304, "learning_rate": 7.426395934257114e-07, "loss": 0.1447, "step": 11027 }, { "epoch": 0.8803384689071605, "grad_norm": 0.30444662012582485, "learning_rate": 7.416621585724127e-07, "loss": 0.1717, "step": 11028 }, { "epoch": 0.880418296479604, "grad_norm": 0.30031587266108395, "learning_rate": 7.406853426023597e-07, "loss": 0.1409, "step": 11029 }, { "epoch": 0.8804981240520475, "grad_norm": 0.2558354507939066, "learning_rate": 7.397091455808481e-07, "loss": 0.1464, "step": 11030 }, { "epoch": 0.8805779516244912, "grad_norm": 0.2547278145755693, "learning_rate": 7.387335675731344e-07, "loss": 0.1389, "step": 11031 }, { "epoch": 0.8806577791969347, "grad_norm": 0.2982511434537415, "learning_rate": 7.377586086444277e-07, "loss": 0.152, "step": 11032 }, { "epoch": 0.8807376067693782, "grad_norm": 0.31634946356978655, "learning_rate": 7.36784268859907e-07, "loss": 0.1471, "step": 11033 }, { "epoch": 0.8808174343418217, "grad_norm": 0.33437745990058154, "learning_rate": 7.358105482846978e-07, "loss": 0.2274, "step": 11034 }, { "epoch": 0.8808972619142652, "grad_norm": 0.2642985747983287, "learning_rate": 7.348374469838915e-07, "loss": 0.1481, "step": 11035 }, { "epoch": 0.8809770894867087, "grad_norm": 0.283683911070726, "learning_rate": 7.338649650225349e-07, "loss": 0.1334, "step": 11036 }, { "epoch": 0.8810569170591522, "grad_norm": 0.23911197791424868, "learning_rate": 7.32893102465636e-07, "loss": 0.1348, "step": 11037 }, { "epoch": 0.8811367446315957, "grad_norm": 0.2809898078551553, "learning_rate": 7.319218593781618e-07, "loss": 0.1956, "step": 11038 }, { "epoch": 0.8812165722040393, "grad_norm": 0.29892763884000995, "learning_rate": 7.309512358250303e-07, "loss": 0.1584, "step": 11039 }, { "epoch": 0.8812963997764828, "grad_norm": 0.26909083202829037, "learning_rate": 7.299812318711274e-07, "loss": 0.1262, "step": 11040 }, { "epoch": 0.8813762273489263, "grad_norm": 0.29865395932132305, "learning_rate": 7.290118475812935e-07, "loss": 0.1769, "step": 11041 }, { "epoch": 0.8814560549213698, "grad_norm": 0.29211748244746716, "learning_rate": 7.280430830203311e-07, "loss": 0.1325, "step": 11042 }, { "epoch": 0.8815358824938133, "grad_norm": 0.256404475845909, "learning_rate": 7.270749382529929e-07, "loss": 0.1756, "step": 11043 }, { "epoch": 0.8816157100662568, "grad_norm": 0.2470931572368833, "learning_rate": 7.261074133439994e-07, "loss": 0.1703, "step": 11044 }, { "epoch": 0.8816955376387005, "grad_norm": 0.28242304813115804, "learning_rate": 7.251405083580243e-07, "loss": 0.1425, "step": 11045 }, { "epoch": 0.881775365211144, "grad_norm": 0.25176795519053924, "learning_rate": 7.241742233597027e-07, "loss": 0.1517, "step": 11046 }, { "epoch": 0.8818551927835875, "grad_norm": 0.29021731865967815, "learning_rate": 7.232085584136251e-07, "loss": 0.1121, "step": 11047 }, { "epoch": 0.881935020356031, "grad_norm": 0.2724276689582044, "learning_rate": 7.22243513584342e-07, "loss": 0.1458, "step": 11048 }, { "epoch": 0.8820148479284745, "grad_norm": 0.2977614847524137, "learning_rate": 7.212790889363652e-07, "loss": 0.1538, "step": 11049 }, { "epoch": 0.882094675500918, "grad_norm": 0.2654336322115175, "learning_rate": 7.203152845341599e-07, "loss": 0.1425, "step": 11050 }, { "epoch": 0.8821745030733615, "grad_norm": 0.2826281217766064, "learning_rate": 7.193521004421578e-07, "loss": 0.1827, "step": 11051 }, { "epoch": 0.882254330645805, "grad_norm": 0.3071723971331947, "learning_rate": 7.183895367247385e-07, "loss": 0.219, "step": 11052 }, { "epoch": 0.8823341582182486, "grad_norm": 0.2860928874903309, "learning_rate": 7.17427593446245e-07, "loss": 0.1818, "step": 11053 }, { "epoch": 0.8824139857906921, "grad_norm": 0.25199387776671534, "learning_rate": 7.164662706709846e-07, "loss": 0.1624, "step": 11054 }, { "epoch": 0.8824938133631356, "grad_norm": 0.2856558035329849, "learning_rate": 7.15505568463215e-07, "loss": 0.1461, "step": 11055 }, { "epoch": 0.8825736409355791, "grad_norm": 0.3279547951536009, "learning_rate": 7.145454868871548e-07, "loss": 0.1681, "step": 11056 }, { "epoch": 0.8826534685080226, "grad_norm": 0.27845753955507124, "learning_rate": 7.135860260069815e-07, "loss": 0.1202, "step": 11057 }, { "epoch": 0.8827332960804662, "grad_norm": 0.2904822060221809, "learning_rate": 7.126271858868328e-07, "loss": 0.1772, "step": 11058 }, { "epoch": 0.8828131236529098, "grad_norm": 0.2662342293930582, "learning_rate": 7.116689665908039e-07, "loss": 0.1621, "step": 11059 }, { "epoch": 0.8828929512253533, "grad_norm": 0.30705158788289005, "learning_rate": 7.107113681829459e-07, "loss": 0.1738, "step": 11060 }, { "epoch": 0.8829727787977968, "grad_norm": 0.305977012694153, "learning_rate": 7.097543907272719e-07, "loss": 0.1857, "step": 11061 }, { "epoch": 0.8830526063702403, "grad_norm": 0.3114539687549998, "learning_rate": 7.087980342877487e-07, "loss": 0.1618, "step": 11062 }, { "epoch": 0.8831324339426838, "grad_norm": 0.3080969035238955, "learning_rate": 7.078422989283107e-07, "loss": 0.1644, "step": 11063 }, { "epoch": 0.8832122615151273, "grad_norm": 0.258121173839766, "learning_rate": 7.068871847128412e-07, "loss": 0.1365, "step": 11064 }, { "epoch": 0.8832920890875708, "grad_norm": 0.27646119218006227, "learning_rate": 7.059326917051868e-07, "loss": 0.1531, "step": 11065 }, { "epoch": 0.8833719166600144, "grad_norm": 0.28155232117501405, "learning_rate": 7.049788199691499e-07, "loss": 0.1422, "step": 11066 }, { "epoch": 0.8834517442324579, "grad_norm": 0.2919200354083873, "learning_rate": 7.040255695684961e-07, "loss": 0.1391, "step": 11067 }, { "epoch": 0.8835315718049014, "grad_norm": 0.28664595730933, "learning_rate": 7.030729405669445e-07, "loss": 0.1543, "step": 11068 }, { "epoch": 0.8836113993773449, "grad_norm": 0.28027641139928516, "learning_rate": 7.021209330281753e-07, "loss": 0.1705, "step": 11069 }, { "epoch": 0.8836912269497884, "grad_norm": 0.3231271654729325, "learning_rate": 7.011695470158264e-07, "loss": 0.1352, "step": 11070 }, { "epoch": 0.8837710545222319, "grad_norm": 0.3280100228565056, "learning_rate": 7.002187825934915e-07, "loss": 0.1988, "step": 11071 }, { "epoch": 0.8838508820946755, "grad_norm": 0.27299135905266314, "learning_rate": 6.992686398247295e-07, "loss": 0.1529, "step": 11072 }, { "epoch": 0.8839307096671191, "grad_norm": 0.33576149610892253, "learning_rate": 6.983191187730531e-07, "loss": 0.1935, "step": 11073 }, { "epoch": 0.8840105372395626, "grad_norm": 0.3109411861840252, "learning_rate": 6.973702195019317e-07, "loss": 0.193, "step": 11074 }, { "epoch": 0.8840903648120061, "grad_norm": 0.271332068785458, "learning_rate": 6.964219420747976e-07, "loss": 0.1463, "step": 11075 }, { "epoch": 0.8841701923844496, "grad_norm": 0.30344021810630584, "learning_rate": 6.954742865550368e-07, "loss": 0.1903, "step": 11076 }, { "epoch": 0.8842500199568931, "grad_norm": 0.2607679800810928, "learning_rate": 6.94527253006001e-07, "loss": 0.128, "step": 11077 }, { "epoch": 0.8843298475293366, "grad_norm": 0.2856197921891649, "learning_rate": 6.935808414909883e-07, "loss": 0.1638, "step": 11078 }, { "epoch": 0.8844096751017801, "grad_norm": 0.27850633825947085, "learning_rate": 6.926350520732705e-07, "loss": 0.1529, "step": 11079 }, { "epoch": 0.8844895026742237, "grad_norm": 0.3251021561931058, "learning_rate": 6.916898848160635e-07, "loss": 0.1553, "step": 11080 }, { "epoch": 0.8845693302466672, "grad_norm": 0.29370759569092053, "learning_rate": 6.907453397825525e-07, "loss": 0.1598, "step": 11081 }, { "epoch": 0.8846491578191107, "grad_norm": 0.2876892899429544, "learning_rate": 6.898014170358769e-07, "loss": 0.1338, "step": 11082 }, { "epoch": 0.8847289853915542, "grad_norm": 0.2683396331695395, "learning_rate": 6.888581166391284e-07, "loss": 0.1821, "step": 11083 }, { "epoch": 0.8848088129639977, "grad_norm": 0.271863112426098, "learning_rate": 6.879154386553688e-07, "loss": 0.1377, "step": 11084 }, { "epoch": 0.8848886405364413, "grad_norm": 0.3067590371015945, "learning_rate": 6.869733831476089e-07, "loss": 0.1427, "step": 11085 }, { "epoch": 0.8849684681088849, "grad_norm": 0.33308111606313756, "learning_rate": 6.860319501788249e-07, "loss": 0.1646, "step": 11086 }, { "epoch": 0.8850482956813284, "grad_norm": 0.2777668178708105, "learning_rate": 6.850911398119431e-07, "loss": 0.1431, "step": 11087 }, { "epoch": 0.8851281232537719, "grad_norm": 0.27809830539232167, "learning_rate": 6.841509521098577e-07, "loss": 0.1535, "step": 11088 }, { "epoch": 0.8852079508262154, "grad_norm": 0.29256785631555526, "learning_rate": 6.832113871354118e-07, "loss": 0.1495, "step": 11089 }, { "epoch": 0.8852877783986589, "grad_norm": 0.29244648302687454, "learning_rate": 6.822724449514184e-07, "loss": 0.1625, "step": 11090 }, { "epoch": 0.8853676059711024, "grad_norm": 0.258751617046959, "learning_rate": 6.813341256206352e-07, "loss": 0.1559, "step": 11091 }, { "epoch": 0.8854474335435459, "grad_norm": 0.30709735284821715, "learning_rate": 6.803964292057874e-07, "loss": 0.1554, "step": 11092 }, { "epoch": 0.8855272611159895, "grad_norm": 0.2769785928274576, "learning_rate": 6.794593557695572e-07, "loss": 0.1337, "step": 11093 }, { "epoch": 0.885607088688433, "grad_norm": 0.3187804313470701, "learning_rate": 6.785229053745846e-07, "loss": 0.1517, "step": 11094 }, { "epoch": 0.8856869162608765, "grad_norm": 0.3092527199985826, "learning_rate": 6.775870780834659e-07, "loss": 0.1647, "step": 11095 }, { "epoch": 0.88576674383332, "grad_norm": 0.27063747628111967, "learning_rate": 6.76651873958758e-07, "loss": 0.144, "step": 11096 }, { "epoch": 0.8858465714057635, "grad_norm": 0.2837386702506524, "learning_rate": 6.757172930629763e-07, "loss": 0.1351, "step": 11097 }, { "epoch": 0.885926398978207, "grad_norm": 0.26496069940503914, "learning_rate": 6.747833354585942e-07, "loss": 0.1969, "step": 11098 }, { "epoch": 0.8860062265506506, "grad_norm": 0.3104780318875546, "learning_rate": 6.738500012080418e-07, "loss": 0.1397, "step": 11099 }, { "epoch": 0.8860860541230942, "grad_norm": 0.24939400646952606, "learning_rate": 6.729172903737091e-07, "loss": 0.1666, "step": 11100 }, { "epoch": 0.8861658816955377, "grad_norm": 0.27691597362696957, "learning_rate": 6.71985203017943e-07, "loss": 0.1474, "step": 11101 }, { "epoch": 0.8862457092679812, "grad_norm": 0.27445838598082867, "learning_rate": 6.710537392030536e-07, "loss": 0.1923, "step": 11102 }, { "epoch": 0.8863255368404247, "grad_norm": 0.2937929969719201, "learning_rate": 6.701228989913022e-07, "loss": 0.1212, "step": 11103 }, { "epoch": 0.8864053644128682, "grad_norm": 0.3587844176709067, "learning_rate": 6.691926824449135e-07, "loss": 0.1288, "step": 11104 }, { "epoch": 0.8864851919853117, "grad_norm": 0.2661909300799441, "learning_rate": 6.682630896260667e-07, "loss": 0.2083, "step": 11105 }, { "epoch": 0.8865650195577552, "grad_norm": 0.27397771637961066, "learning_rate": 6.673341205969019e-07, "loss": 0.1604, "step": 11106 }, { "epoch": 0.8866448471301988, "grad_norm": 0.2756613576991685, "learning_rate": 6.664057754195197e-07, "loss": 0.11, "step": 11107 }, { "epoch": 0.8867246747026423, "grad_norm": 0.2772945020655582, "learning_rate": 6.654780541559747e-07, "loss": 0.1476, "step": 11108 }, { "epoch": 0.8868045022750858, "grad_norm": 0.3480520747725593, "learning_rate": 6.645509568682806e-07, "loss": 0.1551, "step": 11109 }, { "epoch": 0.8868843298475293, "grad_norm": 0.23142133612103666, "learning_rate": 6.636244836184092e-07, "loss": 0.0987, "step": 11110 }, { "epoch": 0.8869641574199728, "grad_norm": 0.30972049438514426, "learning_rate": 6.626986344682951e-07, "loss": 0.163, "step": 11111 }, { "epoch": 0.8870439849924164, "grad_norm": 0.2717970304737993, "learning_rate": 6.617734094798245e-07, "loss": 0.1505, "step": 11112 }, { "epoch": 0.88712381256486, "grad_norm": 0.26699300865935544, "learning_rate": 6.608488087148468e-07, "loss": 0.1927, "step": 11113 }, { "epoch": 0.8872036401373035, "grad_norm": 0.28839471899449104, "learning_rate": 6.599248322351681e-07, "loss": 0.1335, "step": 11114 }, { "epoch": 0.887283467709747, "grad_norm": 0.27783045816196883, "learning_rate": 6.59001480102549e-07, "loss": 0.1732, "step": 11115 }, { "epoch": 0.8873632952821905, "grad_norm": 0.2782867932619908, "learning_rate": 6.580787523787169e-07, "loss": 0.1688, "step": 11116 }, { "epoch": 0.887443122854634, "grad_norm": 0.2713626848217634, "learning_rate": 6.571566491253501e-07, "loss": 0.1457, "step": 11117 }, { "epoch": 0.8875229504270775, "grad_norm": 0.2971048813630887, "learning_rate": 6.56235170404087e-07, "loss": 0.1268, "step": 11118 }, { "epoch": 0.887602777999521, "grad_norm": 0.2740213344047252, "learning_rate": 6.553143162765263e-07, "loss": 0.1483, "step": 11119 }, { "epoch": 0.8876826055719645, "grad_norm": 0.3368391775509618, "learning_rate": 6.543940868042209e-07, "loss": 0.1686, "step": 11120 }, { "epoch": 0.8877624331444081, "grad_norm": 0.32430314910809616, "learning_rate": 6.534744820486905e-07, "loss": 0.1563, "step": 11121 }, { "epoch": 0.8878422607168516, "grad_norm": 0.26124120258271855, "learning_rate": 6.52555502071398e-07, "loss": 0.149, "step": 11122 }, { "epoch": 0.8879220882892951, "grad_norm": 0.25950123243980777, "learning_rate": 6.516371469337812e-07, "loss": 0.1373, "step": 11123 }, { "epoch": 0.8880019158617386, "grad_norm": 0.29442144516347396, "learning_rate": 6.507194166972231e-07, "loss": 0.2037, "step": 11124 }, { "epoch": 0.8880817434341821, "grad_norm": 0.2777467146597261, "learning_rate": 6.498023114230756e-07, "loss": 0.2024, "step": 11125 }, { "epoch": 0.8881615710066257, "grad_norm": 0.3055124120255862, "learning_rate": 6.488858311726398e-07, "loss": 0.132, "step": 11126 }, { "epoch": 0.8882413985790693, "grad_norm": 0.2754706661009364, "learning_rate": 6.479699760071812e-07, "loss": 0.1507, "step": 11127 }, { "epoch": 0.8883212261515128, "grad_norm": 0.2681261271286147, "learning_rate": 6.470547459879184e-07, "loss": 0.1282, "step": 11128 }, { "epoch": 0.8884010537239563, "grad_norm": 0.27069352846088046, "learning_rate": 6.461401411760315e-07, "loss": 0.1363, "step": 11129 }, { "epoch": 0.8884808812963998, "grad_norm": 0.2724979547791815, "learning_rate": 6.452261616326627e-07, "loss": 0.1789, "step": 11130 }, { "epoch": 0.8885607088688433, "grad_norm": 0.26837446502185236, "learning_rate": 6.443128074188998e-07, "loss": 0.1559, "step": 11131 }, { "epoch": 0.8886405364412868, "grad_norm": 0.2772132213884309, "learning_rate": 6.434000785958038e-07, "loss": 0.2096, "step": 11132 }, { "epoch": 0.8887203640137303, "grad_norm": 0.27067222032457444, "learning_rate": 6.424879752243818e-07, "loss": 0.1655, "step": 11133 }, { "epoch": 0.8888001915861738, "grad_norm": 0.26956923372393465, "learning_rate": 6.415764973656113e-07, "loss": 0.1721, "step": 11134 }, { "epoch": 0.8888800191586174, "grad_norm": 0.24817394115270092, "learning_rate": 6.406656450804149e-07, "loss": 0.1081, "step": 11135 }, { "epoch": 0.8889598467310609, "grad_norm": 0.28817832163866014, "learning_rate": 6.397554184296794e-07, "loss": 0.1536, "step": 11136 }, { "epoch": 0.8890396743035044, "grad_norm": 0.27242569719850435, "learning_rate": 6.38845817474254e-07, "loss": 0.1893, "step": 11137 }, { "epoch": 0.8891195018759479, "grad_norm": 0.27595601528910285, "learning_rate": 6.379368422749388e-07, "loss": 0.1821, "step": 11138 }, { "epoch": 0.8891993294483915, "grad_norm": 0.31046730169485315, "learning_rate": 6.370284928924964e-07, "loss": 0.2063, "step": 11139 }, { "epoch": 0.889279157020835, "grad_norm": 0.2946342548872661, "learning_rate": 6.361207693876447e-07, "loss": 0.1266, "step": 11140 }, { "epoch": 0.8893589845932786, "grad_norm": 0.28411738314865986, "learning_rate": 6.352136718210644e-07, "loss": 0.1697, "step": 11141 }, { "epoch": 0.8894388121657221, "grad_norm": 0.26549054301996444, "learning_rate": 6.34307200253389e-07, "loss": 0.1497, "step": 11142 }, { "epoch": 0.8895186397381656, "grad_norm": 0.26535731570398263, "learning_rate": 6.334013547452133e-07, "loss": 0.1005, "step": 11143 }, { "epoch": 0.8895984673106091, "grad_norm": 0.27020956013810227, "learning_rate": 6.324961353570902e-07, "loss": 0.114, "step": 11144 }, { "epoch": 0.8896782948830526, "grad_norm": 0.274861496358227, "learning_rate": 6.315915421495278e-07, "loss": 0.1886, "step": 11145 }, { "epoch": 0.8897581224554961, "grad_norm": 0.2670501600980402, "learning_rate": 6.306875751829966e-07, "loss": 0.1422, "step": 11146 }, { "epoch": 0.8898379500279396, "grad_norm": 0.25733573875315396, "learning_rate": 6.297842345179239e-07, "loss": 0.1925, "step": 11147 }, { "epoch": 0.8899177776003832, "grad_norm": 0.2815537484336712, "learning_rate": 6.288815202146925e-07, "loss": 0.1718, "step": 11148 }, { "epoch": 0.8899976051728267, "grad_norm": 0.32821423009463496, "learning_rate": 6.279794323336452e-07, "loss": 0.2122, "step": 11149 }, { "epoch": 0.8900774327452702, "grad_norm": 0.2567032513461917, "learning_rate": 6.270779709350838e-07, "loss": 0.1569, "step": 11150 }, { "epoch": 0.8901572603177137, "grad_norm": 0.2646413960118905, "learning_rate": 6.261771360792679e-07, "loss": 0.1773, "step": 11151 }, { "epoch": 0.8902370878901572, "grad_norm": 0.28980948231063874, "learning_rate": 6.252769278264148e-07, "loss": 0.1395, "step": 11152 }, { "epoch": 0.8903169154626008, "grad_norm": 0.2588244484463684, "learning_rate": 6.243773462366997e-07, "loss": 0.1711, "step": 11153 }, { "epoch": 0.8903967430350443, "grad_norm": 0.22736423997468466, "learning_rate": 6.234783913702536e-07, "loss": 0.1387, "step": 11154 }, { "epoch": 0.8904765706074879, "grad_norm": 0.31093003779810446, "learning_rate": 6.225800632871715e-07, "loss": 0.1915, "step": 11155 }, { "epoch": 0.8905563981799314, "grad_norm": 0.29616412733927827, "learning_rate": 6.216823620475021e-07, "loss": 0.1837, "step": 11156 }, { "epoch": 0.8906362257523749, "grad_norm": 0.3149754439350284, "learning_rate": 6.20785287711253e-07, "loss": 0.1905, "step": 11157 }, { "epoch": 0.8907160533248184, "grad_norm": 0.2802250092892987, "learning_rate": 6.198888403383896e-07, "loss": 0.1454, "step": 11158 }, { "epoch": 0.8907958808972619, "grad_norm": 0.32643201024040813, "learning_rate": 6.18993019988835e-07, "loss": 0.1489, "step": 11159 }, { "epoch": 0.8908757084697054, "grad_norm": 0.2811268219512577, "learning_rate": 6.180978267224736e-07, "loss": 0.121, "step": 11160 }, { "epoch": 0.8909555360421489, "grad_norm": 0.294824832949167, "learning_rate": 6.172032605991441e-07, "loss": 0.1593, "step": 11161 }, { "epoch": 0.8910353636145925, "grad_norm": 0.2576822141611944, "learning_rate": 6.163093216786453e-07, "loss": 0.1543, "step": 11162 }, { "epoch": 0.891115191187036, "grad_norm": 0.2932941049608959, "learning_rate": 6.154160100207318e-07, "loss": 0.1931, "step": 11163 }, { "epoch": 0.8911950187594795, "grad_norm": 0.3132168961861948, "learning_rate": 6.145233256851202e-07, "loss": 0.1305, "step": 11164 }, { "epoch": 0.891274846331923, "grad_norm": 0.24975970511031434, "learning_rate": 6.136312687314839e-07, "loss": 0.1657, "step": 11165 }, { "epoch": 0.8913546739043666, "grad_norm": 0.2796491299976229, "learning_rate": 6.127398392194483e-07, "loss": 0.1869, "step": 11166 }, { "epoch": 0.8914345014768101, "grad_norm": 0.25858999121065684, "learning_rate": 6.118490372086072e-07, "loss": 0.1195, "step": 11167 }, { "epoch": 0.8915143290492537, "grad_norm": 0.2858179916118153, "learning_rate": 6.109588627585028e-07, "loss": 0.1828, "step": 11168 }, { "epoch": 0.8915941566216972, "grad_norm": 0.26395688571379444, "learning_rate": 6.100693159286453e-07, "loss": 0.1697, "step": 11169 }, { "epoch": 0.8916739841941407, "grad_norm": 0.3011380097037046, "learning_rate": 6.091803967784904e-07, "loss": 0.1438, "step": 11170 }, { "epoch": 0.8917538117665842, "grad_norm": 0.24402279153578013, "learning_rate": 6.08292105367465e-07, "loss": 0.1151, "step": 11171 }, { "epoch": 0.8918336393390277, "grad_norm": 0.2572604613416007, "learning_rate": 6.074044417549451e-07, "loss": 0.116, "step": 11172 }, { "epoch": 0.8919134669114712, "grad_norm": 0.29240683442774446, "learning_rate": 6.065174060002666e-07, "loss": 0.1956, "step": 11173 }, { "epoch": 0.8919932944839147, "grad_norm": 0.2940078057099678, "learning_rate": 6.056309981627284e-07, "loss": 0.1602, "step": 11174 }, { "epoch": 0.8920731220563582, "grad_norm": 0.2826504315837045, "learning_rate": 6.047452183015767e-07, "loss": 0.146, "step": 11175 }, { "epoch": 0.8921529496288018, "grad_norm": 0.30075276011874974, "learning_rate": 6.038600664760285e-07, "loss": 0.1597, "step": 11176 }, { "epoch": 0.8922327772012453, "grad_norm": 0.3259556244243292, "learning_rate": 6.029755427452488e-07, "loss": 0.1716, "step": 11177 }, { "epoch": 0.8923126047736888, "grad_norm": 0.2944443364917465, "learning_rate": 6.020916471683691e-07, "loss": 0.1584, "step": 11178 }, { "epoch": 0.8923924323461323, "grad_norm": 0.27359551614174416, "learning_rate": 6.012083798044699e-07, "loss": 0.1849, "step": 11179 }, { "epoch": 0.8924722599185759, "grad_norm": 0.2833608042543862, "learning_rate": 6.00325740712594e-07, "loss": 0.1932, "step": 11180 }, { "epoch": 0.8925520874910194, "grad_norm": 0.25131216614775403, "learning_rate": 5.994437299517442e-07, "loss": 0.1341, "step": 11181 }, { "epoch": 0.892631915063463, "grad_norm": 0.24426539896004576, "learning_rate": 5.985623475808799e-07, "loss": 0.2187, "step": 11182 }, { "epoch": 0.8927117426359065, "grad_norm": 0.22946214922751135, "learning_rate": 5.976815936589176e-07, "loss": 0.1244, "step": 11183 }, { "epoch": 0.89279157020835, "grad_norm": 0.2618720756065838, "learning_rate": 5.968014682447298e-07, "loss": 0.2062, "step": 11184 }, { "epoch": 0.8928713977807935, "grad_norm": 0.2587683739100522, "learning_rate": 5.959219713971532e-07, "loss": 0.1613, "step": 11185 }, { "epoch": 0.892951225353237, "grad_norm": 0.2639781039083238, "learning_rate": 5.950431031749771e-07, "loss": 0.1886, "step": 11186 }, { "epoch": 0.8930310529256805, "grad_norm": 0.3130295434436023, "learning_rate": 5.941648636369501e-07, "loss": 0.1864, "step": 11187 }, { "epoch": 0.893110880498124, "grad_norm": 0.24203832797362346, "learning_rate": 5.932872528417788e-07, "loss": 0.1836, "step": 11188 }, { "epoch": 0.8931907080705676, "grad_norm": 0.31542219533279325, "learning_rate": 5.924102708481272e-07, "loss": 0.1557, "step": 11189 }, { "epoch": 0.8932705356430111, "grad_norm": 0.3131350011946645, "learning_rate": 5.915339177146207e-07, "loss": 0.1624, "step": 11190 }, { "epoch": 0.8933503632154546, "grad_norm": 0.3294521187164605, "learning_rate": 5.906581934998379e-07, "loss": 0.1462, "step": 11191 }, { "epoch": 0.8934301907878981, "grad_norm": 0.2960793787565522, "learning_rate": 5.897830982623187e-07, "loss": 0.202, "step": 11192 }, { "epoch": 0.8935100183603416, "grad_norm": 0.23119344789507454, "learning_rate": 5.889086320605575e-07, "loss": 0.1259, "step": 11193 }, { "epoch": 0.8935898459327852, "grad_norm": 0.2796015295468506, "learning_rate": 5.880347949530118e-07, "loss": 0.1247, "step": 11194 }, { "epoch": 0.8936696735052287, "grad_norm": 0.31450234387357756, "learning_rate": 5.871615869980929e-07, "loss": 0.1303, "step": 11195 }, { "epoch": 0.8937495010776723, "grad_norm": 0.27726765233228384, "learning_rate": 5.862890082541717e-07, "loss": 0.1318, "step": 11196 }, { "epoch": 0.8938293286501158, "grad_norm": 0.29742406361079354, "learning_rate": 5.85417058779576e-07, "loss": 0.1611, "step": 11197 }, { "epoch": 0.8939091562225593, "grad_norm": 0.31256477769528035, "learning_rate": 5.845457386325915e-07, "loss": 0.1881, "step": 11198 }, { "epoch": 0.8939889837950028, "grad_norm": 0.2566035910843957, "learning_rate": 5.836750478714648e-07, "loss": 0.1468, "step": 11199 }, { "epoch": 0.8940688113674463, "grad_norm": 0.26867607705979596, "learning_rate": 5.828049865543961e-07, "loss": 0.18, "step": 11200 }, { "epoch": 0.8941486389398898, "grad_norm": 0.28255092125870473, "learning_rate": 5.819355547395467e-07, "loss": 0.1776, "step": 11201 }, { "epoch": 0.8942284665123333, "grad_norm": 0.2728029913144253, "learning_rate": 5.810667524850344e-07, "loss": 0.1212, "step": 11202 }, { "epoch": 0.8943082940847769, "grad_norm": 0.28157401669447935, "learning_rate": 5.80198579848933e-07, "loss": 0.1334, "step": 11203 }, { "epoch": 0.8943881216572204, "grad_norm": 0.2751422096474072, "learning_rate": 5.793310368892801e-07, "loss": 0.165, "step": 11204 }, { "epoch": 0.8944679492296639, "grad_norm": 0.35924121699016937, "learning_rate": 5.784641236640665e-07, "loss": 0.1755, "step": 11205 }, { "epoch": 0.8945477768021074, "grad_norm": 0.3035059203984483, "learning_rate": 5.775978402312399e-07, "loss": 0.1831, "step": 11206 }, { "epoch": 0.894627604374551, "grad_norm": 0.3392700588148289, "learning_rate": 5.767321866487085e-07, "loss": 0.1458, "step": 11207 }, { "epoch": 0.8947074319469945, "grad_norm": 0.30323279619969434, "learning_rate": 5.758671629743407e-07, "loss": 0.1744, "step": 11208 }, { "epoch": 0.894787259519438, "grad_norm": 0.30809232869163256, "learning_rate": 5.750027692659588e-07, "loss": 0.1729, "step": 11209 }, { "epoch": 0.8948670870918816, "grad_norm": 0.3118779680506181, "learning_rate": 5.741390055813401e-07, "loss": 0.18, "step": 11210 }, { "epoch": 0.8949469146643251, "grad_norm": 0.31736692120453963, "learning_rate": 5.732758719782272e-07, "loss": 0.1741, "step": 11211 }, { "epoch": 0.8950267422367686, "grad_norm": 0.31523682178703893, "learning_rate": 5.724133685143163e-07, "loss": 0.1682, "step": 11212 }, { "epoch": 0.8951065698092121, "grad_norm": 0.27377420402968056, "learning_rate": 5.715514952472656e-07, "loss": 0.1756, "step": 11213 }, { "epoch": 0.8951863973816556, "grad_norm": 0.2839772224530383, "learning_rate": 5.706902522346824e-07, "loss": 0.1403, "step": 11214 }, { "epoch": 0.8952662249540991, "grad_norm": 0.2779156006367114, "learning_rate": 5.698296395341408e-07, "loss": 0.1551, "step": 11215 }, { "epoch": 0.8953460525265426, "grad_norm": 0.3012021653455214, "learning_rate": 5.689696572031689e-07, "loss": 0.18, "step": 11216 }, { "epoch": 0.8954258800989862, "grad_norm": 0.26028019799582763, "learning_rate": 5.68110305299252e-07, "loss": 0.177, "step": 11217 }, { "epoch": 0.8955057076714297, "grad_norm": 0.26819967872373685, "learning_rate": 5.672515838798376e-07, "loss": 0.1725, "step": 11218 }, { "epoch": 0.8955855352438732, "grad_norm": 0.2991695183870207, "learning_rate": 5.66393493002323e-07, "loss": 0.137, "step": 11219 }, { "epoch": 0.8956653628163167, "grad_norm": 0.3079777628280037, "learning_rate": 5.655360327240711e-07, "loss": 0.1386, "step": 11220 }, { "epoch": 0.8957451903887603, "grad_norm": 0.2948192117623996, "learning_rate": 5.646792031023985e-07, "loss": 0.2039, "step": 11221 }, { "epoch": 0.8958250179612038, "grad_norm": 0.2862842602497632, "learning_rate": 5.638230041945847e-07, "loss": 0.1668, "step": 11222 }, { "epoch": 0.8959048455336474, "grad_norm": 0.3445052429995099, "learning_rate": 5.629674360578574e-07, "loss": 0.1623, "step": 11223 }, { "epoch": 0.8959846731060909, "grad_norm": 0.24680455915913335, "learning_rate": 5.62112498749412e-07, "loss": 0.1187, "step": 11224 }, { "epoch": 0.8960645006785344, "grad_norm": 0.27473006370786257, "learning_rate": 5.61258192326396e-07, "loss": 0.1453, "step": 11225 }, { "epoch": 0.8961443282509779, "grad_norm": 0.3013873916539421, "learning_rate": 5.60404516845916e-07, "loss": 0.1724, "step": 11226 }, { "epoch": 0.8962241558234214, "grad_norm": 0.29825424500174474, "learning_rate": 5.595514723650386e-07, "loss": 0.1644, "step": 11227 }, { "epoch": 0.8963039833958649, "grad_norm": 0.3298719709926346, "learning_rate": 5.586990589407837e-07, "loss": 0.1457, "step": 11228 }, { "epoch": 0.8963838109683084, "grad_norm": 0.30456307093533186, "learning_rate": 5.578472766301357e-07, "loss": 0.1763, "step": 11229 }, { "epoch": 0.896463638540752, "grad_norm": 0.3039705023341452, "learning_rate": 5.569961254900292e-07, "loss": 0.152, "step": 11230 }, { "epoch": 0.8965434661131955, "grad_norm": 0.31854592825818534, "learning_rate": 5.561456055773629e-07, "loss": 0.1536, "step": 11231 }, { "epoch": 0.896623293685639, "grad_norm": 0.2855121754898856, "learning_rate": 5.552957169489892e-07, "loss": 0.1461, "step": 11232 }, { "epoch": 0.8967031212580825, "grad_norm": 0.2903420057653219, "learning_rate": 5.544464596617194e-07, "loss": 0.1276, "step": 11233 }, { "epoch": 0.8967829488305261, "grad_norm": 0.3255853384853635, "learning_rate": 5.535978337723235e-07, "loss": 0.1929, "step": 11234 }, { "epoch": 0.8968627764029696, "grad_norm": 0.2700434559717163, "learning_rate": 5.527498393375307e-07, "loss": 0.1854, "step": 11235 }, { "epoch": 0.8969426039754131, "grad_norm": 0.27649948279173064, "learning_rate": 5.519024764140235e-07, "loss": 0.1445, "step": 11236 }, { "epoch": 0.8970224315478567, "grad_norm": 0.3265258027137, "learning_rate": 5.510557450584442e-07, "loss": 0.174, "step": 11237 }, { "epoch": 0.8971022591203002, "grad_norm": 0.24386899274523904, "learning_rate": 5.502096453273964e-07, "loss": 0.1214, "step": 11238 }, { "epoch": 0.8971820866927437, "grad_norm": 0.3415797359945673, "learning_rate": 5.493641772774372e-07, "loss": 0.1439, "step": 11239 }, { "epoch": 0.8972619142651872, "grad_norm": 0.3095219476119879, "learning_rate": 5.485193409650824e-07, "loss": 0.132, "step": 11240 }, { "epoch": 0.8973417418376307, "grad_norm": 0.24606458434571568, "learning_rate": 5.476751364468069e-07, "loss": 0.1296, "step": 11241 }, { "epoch": 0.8974215694100742, "grad_norm": 0.28821688499059167, "learning_rate": 5.468315637790389e-07, "loss": 0.1484, "step": 11242 }, { "epoch": 0.8975013969825177, "grad_norm": 0.2820023388612423, "learning_rate": 5.459886230181733e-07, "loss": 0.1319, "step": 11243 }, { "epoch": 0.8975812245549613, "grad_norm": 0.26871639121344, "learning_rate": 5.451463142205538e-07, "loss": 0.1496, "step": 11244 }, { "epoch": 0.8976610521274048, "grad_norm": 0.30248058460690913, "learning_rate": 5.443046374424865e-07, "loss": 0.1523, "step": 11245 }, { "epoch": 0.8977408796998483, "grad_norm": 0.26867367519593743, "learning_rate": 5.434635927402343e-07, "loss": 0.1582, "step": 11246 }, { "epoch": 0.8978207072722918, "grad_norm": 0.30349889084600035, "learning_rate": 5.426231801700166e-07, "loss": 0.1646, "step": 11247 }, { "epoch": 0.8979005348447354, "grad_norm": 0.2833776550160386, "learning_rate": 5.417833997880128e-07, "loss": 0.1299, "step": 11248 }, { "epoch": 0.8979803624171789, "grad_norm": 0.31152962580416493, "learning_rate": 5.409442516503594e-07, "loss": 0.1538, "step": 11249 }, { "epoch": 0.8980601899896224, "grad_norm": 0.2843065470986236, "learning_rate": 5.40105735813149e-07, "loss": 0.2009, "step": 11250 }, { "epoch": 0.898140017562066, "grad_norm": 0.274198157388106, "learning_rate": 5.392678523324312e-07, "loss": 0.1705, "step": 11251 }, { "epoch": 0.8982198451345095, "grad_norm": 0.25123757106623273, "learning_rate": 5.384306012642204e-07, "loss": 0.118, "step": 11252 }, { "epoch": 0.898299672706953, "grad_norm": 0.31223032937021156, "learning_rate": 5.375939826644794e-07, "loss": 0.1254, "step": 11253 }, { "epoch": 0.8983795002793965, "grad_norm": 0.28697212789149834, "learning_rate": 5.367579965891334e-07, "loss": 0.1588, "step": 11254 }, { "epoch": 0.89845932785184, "grad_norm": 0.253861385156812, "learning_rate": 5.359226430940667e-07, "loss": 0.167, "step": 11255 }, { "epoch": 0.8985391554242835, "grad_norm": 0.27051548034506323, "learning_rate": 5.350879222351158e-07, "loss": 0.1475, "step": 11256 }, { "epoch": 0.898618982996727, "grad_norm": 0.23899677069045402, "learning_rate": 5.342538340680836e-07, "loss": 0.1868, "step": 11257 }, { "epoch": 0.8986988105691706, "grad_norm": 0.27357783178398526, "learning_rate": 5.334203786487191e-07, "loss": 0.1647, "step": 11258 }, { "epoch": 0.8987786381416141, "grad_norm": 0.3090032400074252, "learning_rate": 5.32587556032741e-07, "loss": 0.1468, "step": 11259 }, { "epoch": 0.8988584657140576, "grad_norm": 0.3151333470920505, "learning_rate": 5.317553662758168e-07, "loss": 0.1477, "step": 11260 }, { "epoch": 0.8989382932865012, "grad_norm": 0.32099969135380274, "learning_rate": 5.3092380943358e-07, "loss": 0.1563, "step": 11261 }, { "epoch": 0.8990181208589447, "grad_norm": 0.30743065266461567, "learning_rate": 5.300928855616105e-07, "loss": 0.1728, "step": 11262 }, { "epoch": 0.8990979484313882, "grad_norm": 0.3277027441006071, "learning_rate": 5.292625947154551e-07, "loss": 0.1842, "step": 11263 }, { "epoch": 0.8991777760038318, "grad_norm": 0.30701444117414245, "learning_rate": 5.28432936950617e-07, "loss": 0.1706, "step": 11264 }, { "epoch": 0.8992576035762753, "grad_norm": 0.28489476695360394, "learning_rate": 5.276039123225518e-07, "loss": 0.1889, "step": 11265 }, { "epoch": 0.8993374311487188, "grad_norm": 0.268728748870794, "learning_rate": 5.26775520886682e-07, "loss": 0.1475, "step": 11266 }, { "epoch": 0.8994172587211623, "grad_norm": 0.3241972139443987, "learning_rate": 5.259477626983767e-07, "loss": 0.1519, "step": 11267 }, { "epoch": 0.8994970862936058, "grad_norm": 0.28475035241764296, "learning_rate": 5.251206378129714e-07, "loss": 0.1706, "step": 11268 }, { "epoch": 0.8995769138660493, "grad_norm": 0.3008915639488957, "learning_rate": 5.242941462857554e-07, "loss": 0.2223, "step": 11269 }, { "epoch": 0.8996567414384928, "grad_norm": 0.27264356703307413, "learning_rate": 5.234682881719766e-07, "loss": 0.1841, "step": 11270 }, { "epoch": 0.8997365690109363, "grad_norm": 0.26526436743333254, "learning_rate": 5.226430635268398e-07, "loss": 0.1633, "step": 11271 }, { "epoch": 0.8998163965833799, "grad_norm": 0.2866686465730812, "learning_rate": 5.218184724055075e-07, "loss": 0.1334, "step": 11272 }, { "epoch": 0.8998962241558234, "grad_norm": 0.27839243825878085, "learning_rate": 5.209945148631012e-07, "loss": 0.1737, "step": 11273 }, { "epoch": 0.8999760517282669, "grad_norm": 0.28635786521022616, "learning_rate": 5.201711909547003e-07, "loss": 0.1212, "step": 11274 }, { "epoch": 0.9000558793007105, "grad_norm": 0.3114858231351227, "learning_rate": 5.193485007353394e-07, "loss": 0.1477, "step": 11275 }, { "epoch": 0.900135706873154, "grad_norm": 0.2786959718361171, "learning_rate": 5.185264442600124e-07, "loss": 0.2558, "step": 11276 }, { "epoch": 0.9002155344455975, "grad_norm": 0.31056956678691716, "learning_rate": 5.177050215836687e-07, "loss": 0.1502, "step": 11277 }, { "epoch": 0.9002953620180411, "grad_norm": 0.2430858493217204, "learning_rate": 5.168842327612212e-07, "loss": 0.1389, "step": 11278 }, { "epoch": 0.9003751895904846, "grad_norm": 0.3234482351167943, "learning_rate": 5.160640778475334e-07, "loss": 0.1991, "step": 11279 }, { "epoch": 0.9004550171629281, "grad_norm": 0.2919683172813656, "learning_rate": 5.152445568974296e-07, "loss": 0.1637, "step": 11280 }, { "epoch": 0.9005348447353716, "grad_norm": 0.2874672270956473, "learning_rate": 5.144256699656913e-07, "loss": 0.1611, "step": 11281 }, { "epoch": 0.9006146723078151, "grad_norm": 0.28586614768671254, "learning_rate": 5.136074171070593e-07, "loss": 0.1455, "step": 11282 }, { "epoch": 0.9006944998802586, "grad_norm": 0.2968068366012944, "learning_rate": 5.127897983762297e-07, "loss": 0.1671, "step": 11283 }, { "epoch": 0.9007743274527021, "grad_norm": 0.25374820612559645, "learning_rate": 5.119728138278579e-07, "loss": 0.1804, "step": 11284 }, { "epoch": 0.9008541550251457, "grad_norm": 0.2952728343196513, "learning_rate": 5.111564635165545e-07, "loss": 0.2251, "step": 11285 }, { "epoch": 0.9009339825975892, "grad_norm": 0.2929269270090674, "learning_rate": 5.103407474968902e-07, "loss": 0.1967, "step": 11286 }, { "epoch": 0.9010138101700327, "grad_norm": 0.27952013248505636, "learning_rate": 5.095256658233927e-07, "loss": 0.1343, "step": 11287 }, { "epoch": 0.9010936377424763, "grad_norm": 0.28708737322373284, "learning_rate": 5.08711218550546e-07, "loss": 0.1859, "step": 11288 }, { "epoch": 0.9011734653149198, "grad_norm": 0.289996294157238, "learning_rate": 5.078974057327945e-07, "loss": 0.1502, "step": 11289 }, { "epoch": 0.9012532928873633, "grad_norm": 0.27848518170459474, "learning_rate": 5.070842274245358e-07, "loss": 0.1708, "step": 11290 }, { "epoch": 0.9013331204598068, "grad_norm": 0.3115823007490757, "learning_rate": 5.062716836801296e-07, "loss": 0.1646, "step": 11291 }, { "epoch": 0.9014129480322504, "grad_norm": 0.2311398673881412, "learning_rate": 5.054597745538914e-07, "loss": 0.1309, "step": 11292 }, { "epoch": 0.9014927756046939, "grad_norm": 0.3025147857285786, "learning_rate": 5.046485001000923e-07, "loss": 0.1347, "step": 11293 }, { "epoch": 0.9015726031771374, "grad_norm": 0.2740695736986143, "learning_rate": 5.038378603729643e-07, "loss": 0.1576, "step": 11294 }, { "epoch": 0.9016524307495809, "grad_norm": 0.29578536691692336, "learning_rate": 5.030278554266942e-07, "loss": 0.1562, "step": 11295 }, { "epoch": 0.9017322583220244, "grad_norm": 0.28641642365248354, "learning_rate": 5.0221848531543e-07, "loss": 0.1829, "step": 11296 }, { "epoch": 0.9018120858944679, "grad_norm": 0.31589945186753676, "learning_rate": 5.014097500932724e-07, "loss": 0.1622, "step": 11297 }, { "epoch": 0.9018919134669114, "grad_norm": 0.3169814598726699, "learning_rate": 5.006016498142829e-07, "loss": 0.1854, "step": 11298 }, { "epoch": 0.901971741039355, "grad_norm": 0.30553769552963533, "learning_rate": 4.997941845324805e-07, "loss": 0.1664, "step": 11299 }, { "epoch": 0.9020515686117985, "grad_norm": 0.2794637675419944, "learning_rate": 4.989873543018397e-07, "loss": 0.1564, "step": 11300 }, { "epoch": 0.902131396184242, "grad_norm": 0.3137556584761827, "learning_rate": 4.981811591762975e-07, "loss": 0.1692, "step": 11301 }, { "epoch": 0.9022112237566856, "grad_norm": 0.27481256094309287, "learning_rate": 4.973755992097385e-07, "loss": 0.1748, "step": 11302 }, { "epoch": 0.9022910513291291, "grad_norm": 0.3269151617273566, "learning_rate": 4.965706744560161e-07, "loss": 0.1912, "step": 11303 }, { "epoch": 0.9023708789015726, "grad_norm": 0.30287414744830055, "learning_rate": 4.957663849689342e-07, "loss": 0.1387, "step": 11304 }, { "epoch": 0.9024507064740162, "grad_norm": 0.2269970019042203, "learning_rate": 4.949627308022598e-07, "loss": 0.1354, "step": 11305 }, { "epoch": 0.9025305340464597, "grad_norm": 0.27885482623524865, "learning_rate": 4.941597120097108e-07, "loss": 0.1316, "step": 11306 }, { "epoch": 0.9026103616189032, "grad_norm": 0.28074252617933737, "learning_rate": 4.933573286449633e-07, "loss": 0.1506, "step": 11307 }, { "epoch": 0.9026901891913467, "grad_norm": 0.2725075692050066, "learning_rate": 4.9255558076166e-07, "loss": 0.159, "step": 11308 }, { "epoch": 0.9027700167637902, "grad_norm": 0.28220470324627733, "learning_rate": 4.91754468413388e-07, "loss": 0.2002, "step": 11309 }, { "epoch": 0.9028498443362337, "grad_norm": 0.34547337545655593, "learning_rate": 4.909539916537054e-07, "loss": 0.1321, "step": 11310 }, { "epoch": 0.9029296719086772, "grad_norm": 0.28078693930943815, "learning_rate": 4.90154150536114e-07, "loss": 0.1416, "step": 11311 }, { "epoch": 0.9030094994811207, "grad_norm": 0.30367124808829526, "learning_rate": 4.893549451140844e-07, "loss": 0.1507, "step": 11312 }, { "epoch": 0.9030893270535643, "grad_norm": 0.27652261053607796, "learning_rate": 4.885563754410394e-07, "loss": 0.118, "step": 11313 }, { "epoch": 0.9031691546260078, "grad_norm": 0.28387075832558895, "learning_rate": 4.877584415703595e-07, "loss": 0.1836, "step": 11314 }, { "epoch": 0.9032489821984514, "grad_norm": 0.29921492344314504, "learning_rate": 4.869611435553845e-07, "loss": 0.1602, "step": 11315 }, { "epoch": 0.9033288097708949, "grad_norm": 0.28871872839713647, "learning_rate": 4.861644814494082e-07, "loss": 0.1672, "step": 11316 }, { "epoch": 0.9034086373433384, "grad_norm": 0.28727246608441637, "learning_rate": 4.853684553056881e-07, "loss": 0.1741, "step": 11317 }, { "epoch": 0.9034884649157819, "grad_norm": 0.30021728727452823, "learning_rate": 4.845730651774328e-07, "loss": 0.1721, "step": 11318 }, { "epoch": 0.9035682924882255, "grad_norm": 0.3110413705512572, "learning_rate": 4.837783111178107e-07, "loss": 0.1695, "step": 11319 }, { "epoch": 0.903648120060669, "grad_norm": 0.28998775167668933, "learning_rate": 4.829841931799484e-07, "loss": 0.1348, "step": 11320 }, { "epoch": 0.9037279476331125, "grad_norm": 0.2859762446766777, "learning_rate": 4.821907114169311e-07, "loss": 0.1468, "step": 11321 }, { "epoch": 0.903807775205556, "grad_norm": 0.2914439830258406, "learning_rate": 4.813978658817986e-07, "loss": 0.1281, "step": 11322 }, { "epoch": 0.9038876027779995, "grad_norm": 0.29535303534735696, "learning_rate": 4.806056566275496e-07, "loss": 0.116, "step": 11323 }, { "epoch": 0.903967430350443, "grad_norm": 0.2636148286837445, "learning_rate": 4.798140837071408e-07, "loss": 0.1451, "step": 11324 }, { "epoch": 0.9040472579228865, "grad_norm": 0.26083396846346374, "learning_rate": 4.790231471734829e-07, "loss": 0.1453, "step": 11325 }, { "epoch": 0.90412708549533, "grad_norm": 0.353914882168168, "learning_rate": 4.782328470794506e-07, "loss": 0.1624, "step": 11326 }, { "epoch": 0.9042069130677736, "grad_norm": 0.2827428329657204, "learning_rate": 4.774431834778714e-07, "loss": 0.1291, "step": 11327 }, { "epoch": 0.9042867406402171, "grad_norm": 0.2971333497934428, "learning_rate": 4.76654156421531e-07, "loss": 0.1571, "step": 11328 }, { "epoch": 0.9043665682126607, "grad_norm": 0.2684254997525328, "learning_rate": 4.7586576596317157e-07, "loss": 0.1349, "step": 11329 }, { "epoch": 0.9044463957851042, "grad_norm": 0.28093508057769656, "learning_rate": 4.750780121554932e-07, "loss": 0.1387, "step": 11330 }, { "epoch": 0.9045262233575477, "grad_norm": 0.2795480731038447, "learning_rate": 4.742908950511571e-07, "loss": 0.147, "step": 11331 }, { "epoch": 0.9046060509299912, "grad_norm": 0.26941518218762983, "learning_rate": 4.7350441470277675e-07, "loss": 0.1319, "step": 11332 }, { "epoch": 0.9046858785024348, "grad_norm": 0.2591001407648474, "learning_rate": 4.7271857116292675e-07, "loss": 0.1585, "step": 11333 }, { "epoch": 0.9047657060748783, "grad_norm": 0.28973542666635727, "learning_rate": 4.7193336448413507e-07, "loss": 0.1301, "step": 11334 }, { "epoch": 0.9048455336473218, "grad_norm": 0.2424083273708649, "learning_rate": 4.7114879471889306e-07, "loss": 0.1211, "step": 11335 }, { "epoch": 0.9049253612197653, "grad_norm": 0.27992083426959713, "learning_rate": 4.7036486191964436e-07, "loss": 0.1616, "step": 11336 }, { "epoch": 0.9050051887922088, "grad_norm": 0.29010233543420283, "learning_rate": 4.6958156613879145e-07, "loss": 0.1811, "step": 11337 }, { "epoch": 0.9050850163646523, "grad_norm": 0.29162791390972487, "learning_rate": 4.6879890742869584e-07, "loss": 0.1754, "step": 11338 }, { "epoch": 0.9051648439370958, "grad_norm": 0.26243424718094205, "learning_rate": 4.6801688584167224e-07, "loss": 0.1794, "step": 11339 }, { "epoch": 0.9052446715095394, "grad_norm": 0.2868762863127122, "learning_rate": 4.6723550142999895e-07, "loss": 0.1391, "step": 11340 }, { "epoch": 0.9053244990819829, "grad_norm": 0.2842807537383867, "learning_rate": 4.6645475424590747e-07, "loss": 0.1472, "step": 11341 }, { "epoch": 0.9054043266544264, "grad_norm": 0.2918734396932075, "learning_rate": 4.6567464434158824e-07, "loss": 0.1563, "step": 11342 }, { "epoch": 0.90548415422687, "grad_norm": 0.28053546731031154, "learning_rate": 4.648951717691863e-07, "loss": 0.1906, "step": 11343 }, { "epoch": 0.9055639817993135, "grad_norm": 0.3081298534896428, "learning_rate": 4.641163365808066e-07, "loss": 0.173, "step": 11344 }, { "epoch": 0.905643809371757, "grad_norm": 0.2637675594537708, "learning_rate": 4.633381388285152e-07, "loss": 0.1736, "step": 11345 }, { "epoch": 0.9057236369442005, "grad_norm": 0.3140300053183292, "learning_rate": 4.6256057856432613e-07, "loss": 0.1714, "step": 11346 }, { "epoch": 0.9058034645166441, "grad_norm": 0.24529170003181947, "learning_rate": 4.6178365584022e-07, "loss": 0.1293, "step": 11347 }, { "epoch": 0.9058832920890876, "grad_norm": 0.28007894896615837, "learning_rate": 4.610073707081286e-07, "loss": 0.1588, "step": 11348 }, { "epoch": 0.9059631196615311, "grad_norm": 0.27639109410228546, "learning_rate": 4.6023172321994713e-07, "loss": 0.2107, "step": 11349 }, { "epoch": 0.9060429472339746, "grad_norm": 0.26930665536264786, "learning_rate": 4.594567134275207e-07, "loss": 0.1777, "step": 11350 }, { "epoch": 0.9061227748064181, "grad_norm": 0.2802444083738377, "learning_rate": 4.5868234138265576e-07, "loss": 0.1947, "step": 11351 }, { "epoch": 0.9062026023788616, "grad_norm": 0.3044382191247872, "learning_rate": 4.579086071371186e-07, "loss": 0.1591, "step": 11352 }, { "epoch": 0.9062824299513051, "grad_norm": 0.26741107822496496, "learning_rate": 4.57135510742629e-07, "loss": 0.162, "step": 11353 }, { "epoch": 0.9063622575237487, "grad_norm": 0.31188180272488464, "learning_rate": 4.563630522508644e-07, "loss": 0.1579, "step": 11354 }, { "epoch": 0.9064420850961922, "grad_norm": 0.33026161292318135, "learning_rate": 4.5559123171346145e-07, "loss": 0.2028, "step": 11355 }, { "epoch": 0.9065219126686358, "grad_norm": 0.27327752361772306, "learning_rate": 4.5482004918201426e-07, "loss": 0.1284, "step": 11356 }, { "epoch": 0.9066017402410793, "grad_norm": 0.2683162793243554, "learning_rate": 4.5404950470807176e-07, "loss": 0.1648, "step": 11357 }, { "epoch": 0.9066815678135228, "grad_norm": 0.286760788016044, "learning_rate": 4.532795983431437e-07, "loss": 0.2273, "step": 11358 }, { "epoch": 0.9067613953859663, "grad_norm": 0.3112362688231769, "learning_rate": 4.525103301386935e-07, "loss": 0.1687, "step": 11359 }, { "epoch": 0.9068412229584099, "grad_norm": 0.26573521771897307, "learning_rate": 4.5174170014614327e-07, "loss": 0.1763, "step": 11360 }, { "epoch": 0.9069210505308534, "grad_norm": 0.24267440163239723, "learning_rate": 4.509737084168753e-07, "loss": 0.1093, "step": 11361 }, { "epoch": 0.9070008781032969, "grad_norm": 0.22851693392834818, "learning_rate": 4.5020635500222507e-07, "loss": 0.17, "step": 11362 }, { "epoch": 0.9070807056757404, "grad_norm": 0.2750593643401046, "learning_rate": 4.4943963995348836e-07, "loss": 0.1615, "step": 11363 }, { "epoch": 0.9071605332481839, "grad_norm": 0.31319313970332346, "learning_rate": 4.4867356332191303e-07, "loss": 0.1453, "step": 11364 }, { "epoch": 0.9072403608206274, "grad_norm": 0.3189425635682674, "learning_rate": 4.479081251587136e-07, "loss": 0.1455, "step": 11365 }, { "epoch": 0.9073201883930709, "grad_norm": 0.3066433556561748, "learning_rate": 4.471433255150548e-07, "loss": 0.1507, "step": 11366 }, { "epoch": 0.9074000159655145, "grad_norm": 0.27587305786814936, "learning_rate": 4.463791644420601e-07, "loss": 0.1379, "step": 11367 }, { "epoch": 0.907479843537958, "grad_norm": 0.2826560655958637, "learning_rate": 4.4561564199080976e-07, "loss": 0.1312, "step": 11368 }, { "epoch": 0.9075596711104015, "grad_norm": 0.28663633568868613, "learning_rate": 4.4485275821234186e-07, "loss": 0.1802, "step": 11369 }, { "epoch": 0.9076394986828451, "grad_norm": 0.26719399637811897, "learning_rate": 4.440905131576545e-07, "loss": 0.1484, "step": 11370 }, { "epoch": 0.9077193262552886, "grad_norm": 0.2552443559375045, "learning_rate": 4.4332890687770024e-07, "loss": 0.1677, "step": 11371 }, { "epoch": 0.9077991538277321, "grad_norm": 0.30136048903358437, "learning_rate": 4.4256793942338837e-07, "loss": 0.1397, "step": 11372 }, { "epoch": 0.9078789814001756, "grad_norm": 0.2638540458118836, "learning_rate": 4.418076108455871e-07, "loss": 0.1592, "step": 11373 }, { "epoch": 0.9079588089726192, "grad_norm": 0.269803825235446, "learning_rate": 4.410479211951191e-07, "loss": 0.1997, "step": 11374 }, { "epoch": 0.9080386365450627, "grad_norm": 0.30738529571804174, "learning_rate": 4.402888705227715e-07, "loss": 0.1934, "step": 11375 }, { "epoch": 0.9081184641175062, "grad_norm": 0.28763576227795024, "learning_rate": 4.3953045887927927e-07, "loss": 0.1314, "step": 11376 }, { "epoch": 0.9081982916899497, "grad_norm": 0.28268653831487645, "learning_rate": 4.3877268631534296e-07, "loss": 0.222, "step": 11377 }, { "epoch": 0.9082781192623932, "grad_norm": 0.28143634354696895, "learning_rate": 4.3801555288161213e-07, "loss": 0.1864, "step": 11378 }, { "epoch": 0.9083579468348367, "grad_norm": 0.2979068037107129, "learning_rate": 4.3725905862870286e-07, "loss": 0.1648, "step": 11379 }, { "epoch": 0.9084377744072802, "grad_norm": 0.31992750400927406, "learning_rate": 4.365032036071826e-07, "loss": 0.2186, "step": 11380 }, { "epoch": 0.9085176019797238, "grad_norm": 0.29257426800747244, "learning_rate": 4.357479878675741e-07, "loss": 0.139, "step": 11381 }, { "epoch": 0.9085974295521673, "grad_norm": 0.2823724207874613, "learning_rate": 4.349934114603638e-07, "loss": 0.1388, "step": 11382 }, { "epoch": 0.9086772571246109, "grad_norm": 0.2823347195165688, "learning_rate": 4.3423947443599015e-07, "loss": 0.1566, "step": 11383 }, { "epoch": 0.9087570846970544, "grad_norm": 0.24477610901477836, "learning_rate": 4.3348617684485284e-07, "loss": 0.1831, "step": 11384 }, { "epoch": 0.9088369122694979, "grad_norm": 0.27351003189931666, "learning_rate": 4.3273351873730606e-07, "loss": 0.1346, "step": 11385 }, { "epoch": 0.9089167398419414, "grad_norm": 0.2832713570179348, "learning_rate": 4.319815001636618e-07, "loss": 0.1311, "step": 11386 }, { "epoch": 0.908996567414385, "grad_norm": 0.2934287974119607, "learning_rate": 4.312301211741876e-07, "loss": 0.1425, "step": 11387 }, { "epoch": 0.9090763949868285, "grad_norm": 0.28638367189000197, "learning_rate": 4.3047938181911443e-07, "loss": 0.1784, "step": 11388 }, { "epoch": 0.909156222559272, "grad_norm": 0.2554562158662466, "learning_rate": 4.297292821486254e-07, "loss": 0.1183, "step": 11389 }, { "epoch": 0.9092360501317155, "grad_norm": 0.2808490666971683, "learning_rate": 4.2897982221285717e-07, "loss": 0.1978, "step": 11390 }, { "epoch": 0.909315877704159, "grad_norm": 0.2836445596654909, "learning_rate": 4.282310020619129e-07, "loss": 0.1025, "step": 11391 }, { "epoch": 0.9093957052766025, "grad_norm": 0.3135011021969349, "learning_rate": 4.274828217458449e-07, "loss": 0.1358, "step": 11392 }, { "epoch": 0.909475532849046, "grad_norm": 0.33468906514688757, "learning_rate": 4.2673528131466967e-07, "loss": 0.1537, "step": 11393 }, { "epoch": 0.9095553604214895, "grad_norm": 0.3135858333113268, "learning_rate": 4.25988380818354e-07, "loss": 0.1673, "step": 11394 }, { "epoch": 0.9096351879939331, "grad_norm": 0.2839136844553321, "learning_rate": 4.2524212030682797e-07, "loss": 0.153, "step": 11395 }, { "epoch": 0.9097150155663766, "grad_norm": 0.28541337905711234, "learning_rate": 4.244964998299739e-07, "loss": 0.1294, "step": 11396 }, { "epoch": 0.9097948431388202, "grad_norm": 0.3513965581052299, "learning_rate": 4.2375151943763516e-07, "loss": 0.184, "step": 11397 }, { "epoch": 0.9098746707112637, "grad_norm": 0.25993324545165974, "learning_rate": 4.2300717917960975e-07, "loss": 0.1554, "step": 11398 }, { "epoch": 0.9099544982837072, "grad_norm": 0.29559595905545344, "learning_rate": 4.222634791056535e-07, "loss": 0.1561, "step": 11399 }, { "epoch": 0.9100343258561507, "grad_norm": 0.28372778407800714, "learning_rate": 4.2152041926548206e-07, "loss": 0.1485, "step": 11400 }, { "epoch": 0.9101141534285943, "grad_norm": 0.25014567467471405, "learning_rate": 4.207779997087624e-07, "loss": 0.1355, "step": 11401 }, { "epoch": 0.9101939810010378, "grad_norm": 0.24954174175315724, "learning_rate": 4.2003622048512716e-07, "loss": 0.1743, "step": 11402 }, { "epoch": 0.9102738085734813, "grad_norm": 0.27027972703062486, "learning_rate": 4.1929508164415765e-07, "loss": 0.1564, "step": 11403 }, { "epoch": 0.9103536361459248, "grad_norm": 0.268488680187218, "learning_rate": 4.1855458323539543e-07, "loss": 0.1602, "step": 11404 }, { "epoch": 0.9104334637183683, "grad_norm": 0.27350615506631887, "learning_rate": 4.178147253083431e-07, "loss": 0.1772, "step": 11405 }, { "epoch": 0.9105132912908118, "grad_norm": 0.2878917339757798, "learning_rate": 4.170755079124544e-07, "loss": 0.1522, "step": 11406 }, { "epoch": 0.9105931188632553, "grad_norm": 0.2835077912225756, "learning_rate": 4.1633693109714543e-07, "loss": 0.1938, "step": 11407 }, { "epoch": 0.9106729464356988, "grad_norm": 0.3097171809053831, "learning_rate": 4.155989949117845e-07, "loss": 0.1381, "step": 11408 }, { "epoch": 0.9107527740081424, "grad_norm": 0.27039032612770925, "learning_rate": 4.1486169940570307e-07, "loss": 0.1483, "step": 11409 }, { "epoch": 0.910832601580586, "grad_norm": 0.2912878034673464, "learning_rate": 4.1412504462818413e-07, "loss": 0.1691, "step": 11410 }, { "epoch": 0.9109124291530295, "grad_norm": 0.26110671756961645, "learning_rate": 4.1338903062847044e-07, "loss": 0.1965, "step": 11411 }, { "epoch": 0.910992256725473, "grad_norm": 0.26414053263987614, "learning_rate": 4.126536574557627e-07, "loss": 0.1381, "step": 11412 }, { "epoch": 0.9110720842979165, "grad_norm": 0.26107075171432503, "learning_rate": 4.1191892515921484e-07, "loss": 0.1597, "step": 11413 }, { "epoch": 0.91115191187036, "grad_norm": 0.3092858335216538, "learning_rate": 4.1118483378794315e-07, "loss": 0.1732, "step": 11414 }, { "epoch": 0.9112317394428036, "grad_norm": 0.24240825373064456, "learning_rate": 4.1045138339101954e-07, "loss": 0.1749, "step": 11415 }, { "epoch": 0.9113115670152471, "grad_norm": 0.27758812453354464, "learning_rate": 4.097185740174703e-07, "loss": 0.1758, "step": 11416 }, { "epoch": 0.9113913945876906, "grad_norm": 0.29213864782114013, "learning_rate": 4.089864057162818e-07, "loss": 0.1572, "step": 11417 }, { "epoch": 0.9114712221601341, "grad_norm": 0.28822159888216115, "learning_rate": 4.08254878536396e-07, "loss": 0.1524, "step": 11418 }, { "epoch": 0.9115510497325776, "grad_norm": 0.2976154280233449, "learning_rate": 4.075239925267127e-07, "loss": 0.199, "step": 11419 }, { "epoch": 0.9116308773050211, "grad_norm": 0.2542925972956009, "learning_rate": 4.067937477360906e-07, "loss": 0.1146, "step": 11420 }, { "epoch": 0.9117107048774646, "grad_norm": 0.29154566757089057, "learning_rate": 4.060641442133406e-07, "loss": 0.1753, "step": 11421 }, { "epoch": 0.9117905324499082, "grad_norm": 0.2728718522762911, "learning_rate": 4.053351820072349e-07, "loss": 0.1666, "step": 11422 }, { "epoch": 0.9118703600223517, "grad_norm": 0.2873585761571903, "learning_rate": 4.046068611665033e-07, "loss": 0.1613, "step": 11423 }, { "epoch": 0.9119501875947953, "grad_norm": 0.2813901525848609, "learning_rate": 4.038791817398302e-07, "loss": 0.1966, "step": 11424 }, { "epoch": 0.9120300151672388, "grad_norm": 0.273490437458663, "learning_rate": 4.031521437758579e-07, "loss": 0.1403, "step": 11425 }, { "epoch": 0.9121098427396823, "grad_norm": 0.27673728923685165, "learning_rate": 4.024257473231863e-07, "loss": 0.1503, "step": 11426 }, { "epoch": 0.9121896703121258, "grad_norm": 0.2776416801807545, "learning_rate": 4.0169999243036995e-07, "loss": 0.1496, "step": 11427 }, { "epoch": 0.9122694978845693, "grad_norm": 0.2831106468149029, "learning_rate": 4.009748791459267e-07, "loss": 0.1161, "step": 11428 }, { "epoch": 0.9123493254570129, "grad_norm": 0.26620670482343267, "learning_rate": 4.0025040751832333e-07, "loss": 0.1762, "step": 11429 }, { "epoch": 0.9124291530294564, "grad_norm": 0.2502980340903359, "learning_rate": 3.995265775959911e-07, "loss": 0.1332, "step": 11430 }, { "epoch": 0.9125089806018999, "grad_norm": 0.29515793013441377, "learning_rate": 3.9880338942731357e-07, "loss": 0.1119, "step": 11431 }, { "epoch": 0.9125888081743434, "grad_norm": 0.2770446142269445, "learning_rate": 3.9808084306063423e-07, "loss": 0.1516, "step": 11432 }, { "epoch": 0.9126686357467869, "grad_norm": 0.28987433815531716, "learning_rate": 3.9735893854425225e-07, "loss": 0.1343, "step": 11433 }, { "epoch": 0.9127484633192304, "grad_norm": 0.24453670674160793, "learning_rate": 3.9663767592642235e-07, "loss": 0.1264, "step": 11434 }, { "epoch": 0.9128282908916739, "grad_norm": 0.24663124768843733, "learning_rate": 3.959170552553593e-07, "loss": 0.1837, "step": 11435 }, { "epoch": 0.9129081184641175, "grad_norm": 0.2728596524328623, "learning_rate": 3.9519707657923234e-07, "loss": 0.155, "step": 11436 }, { "epoch": 0.9129879460365611, "grad_norm": 0.271663622618847, "learning_rate": 3.944777399461741e-07, "loss": 0.1633, "step": 11437 }, { "epoch": 0.9130677736090046, "grad_norm": 0.28999339812910857, "learning_rate": 3.937590454042628e-07, "loss": 0.1907, "step": 11438 }, { "epoch": 0.9131476011814481, "grad_norm": 0.2998505398489885, "learning_rate": 3.9304099300154444e-07, "loss": 0.1507, "step": 11439 }, { "epoch": 0.9132274287538916, "grad_norm": 0.3199999026609779, "learning_rate": 3.923235827860172e-07, "loss": 0.1549, "step": 11440 }, { "epoch": 0.9133072563263351, "grad_norm": 0.2832094377250853, "learning_rate": 3.916068148056362e-07, "loss": 0.1455, "step": 11441 }, { "epoch": 0.9133870838987787, "grad_norm": 0.2910964332101455, "learning_rate": 3.908906891083164e-07, "loss": 0.1406, "step": 11442 }, { "epoch": 0.9134669114712222, "grad_norm": 0.2579357627197325, "learning_rate": 3.9017520574192504e-07, "loss": 0.1853, "step": 11443 }, { "epoch": 0.9135467390436657, "grad_norm": 0.24391943004093009, "learning_rate": 3.894603647542916e-07, "loss": 0.1742, "step": 11444 }, { "epoch": 0.9136265666161092, "grad_norm": 0.2572349453080079, "learning_rate": 3.8874616619320017e-07, "loss": 0.1456, "step": 11445 }, { "epoch": 0.9137063941885527, "grad_norm": 0.30036952861028954, "learning_rate": 3.8803261010639137e-07, "loss": 0.1446, "step": 11446 }, { "epoch": 0.9137862217609962, "grad_norm": 0.2963985484499525, "learning_rate": 3.873196965415649e-07, "loss": 0.1638, "step": 11447 }, { "epoch": 0.9138660493334397, "grad_norm": 0.2292338229709998, "learning_rate": 3.8660742554637365e-07, "loss": 0.1923, "step": 11448 }, { "epoch": 0.9139458769058832, "grad_norm": 0.2956411978254178, "learning_rate": 3.8589579716843296e-07, "loss": 0.181, "step": 11449 }, { "epoch": 0.9140257044783268, "grad_norm": 0.3128416823483886, "learning_rate": 3.8518481145531027e-07, "loss": 0.162, "step": 11450 }, { "epoch": 0.9141055320507704, "grad_norm": 0.25653797188758254, "learning_rate": 3.844744684545343e-07, "loss": 0.15, "step": 11451 }, { "epoch": 0.9141853596232139, "grad_norm": 0.27630490220892717, "learning_rate": 3.837647682135859e-07, "loss": 0.1661, "step": 11452 }, { "epoch": 0.9142651871956574, "grad_norm": 0.294395524994624, "learning_rate": 3.830557107799082e-07, "loss": 0.1809, "step": 11453 }, { "epoch": 0.9143450147681009, "grad_norm": 0.35590112436126786, "learning_rate": 3.823472962008978e-07, "loss": 0.1637, "step": 11454 }, { "epoch": 0.9144248423405444, "grad_norm": 0.2645741829208625, "learning_rate": 3.816395245239102e-07, "loss": 0.1673, "step": 11455 }, { "epoch": 0.914504669912988, "grad_norm": 0.25258149076587344, "learning_rate": 3.809323957962563e-07, "loss": 0.1711, "step": 11456 }, { "epoch": 0.9145844974854315, "grad_norm": 0.25187501759691394, "learning_rate": 3.802259100652028e-07, "loss": 0.1403, "step": 11457 }, { "epoch": 0.914664325057875, "grad_norm": 0.27077656450644877, "learning_rate": 3.7952006737797975e-07, "loss": 0.1836, "step": 11458 }, { "epoch": 0.9147441526303185, "grad_norm": 0.2868886431403618, "learning_rate": 3.788148677817682e-07, "loss": 0.1756, "step": 11459 }, { "epoch": 0.914823980202762, "grad_norm": 0.3177340493467777, "learning_rate": 3.7811031132370725e-07, "loss": 0.1613, "step": 11460 }, { "epoch": 0.9149038077752055, "grad_norm": 0.2930686115383715, "learning_rate": 3.774063980508924e-07, "loss": 0.1806, "step": 11461 }, { "epoch": 0.914983635347649, "grad_norm": 0.2647429790691015, "learning_rate": 3.7670312801038166e-07, "loss": 0.2228, "step": 11462 }, { "epoch": 0.9150634629200926, "grad_norm": 0.35748945590042325, "learning_rate": 3.7600050124918184e-07, "loss": 0.1452, "step": 11463 }, { "epoch": 0.9151432904925362, "grad_norm": 0.29507327128946964, "learning_rate": 3.752985178142632e-07, "loss": 0.1776, "step": 11464 }, { "epoch": 0.9152231180649797, "grad_norm": 0.30525026528085486, "learning_rate": 3.7459717775254924e-07, "loss": 0.178, "step": 11465 }, { "epoch": 0.9153029456374232, "grad_norm": 0.30013635354950735, "learning_rate": 3.7389648111092137e-07, "loss": 0.1226, "step": 11466 }, { "epoch": 0.9153827732098667, "grad_norm": 0.2415625121244466, "learning_rate": 3.73196427936221e-07, "loss": 0.1316, "step": 11467 }, { "epoch": 0.9154626007823102, "grad_norm": 0.2696629783710266, "learning_rate": 3.724970182752408e-07, "loss": 0.1538, "step": 11468 }, { "epoch": 0.9155424283547537, "grad_norm": 0.2724457663332687, "learning_rate": 3.717982521747354e-07, "loss": 0.123, "step": 11469 }, { "epoch": 0.9156222559271973, "grad_norm": 0.31091063888538095, "learning_rate": 3.711001296814143e-07, "loss": 0.2068, "step": 11470 }, { "epoch": 0.9157020834996408, "grad_norm": 0.32880049005262524, "learning_rate": 3.704026508419423e-07, "loss": 0.2248, "step": 11471 }, { "epoch": 0.9157819110720843, "grad_norm": 0.24778177646580693, "learning_rate": 3.697058157029465e-07, "loss": 0.1431, "step": 11472 }, { "epoch": 0.9158617386445278, "grad_norm": 0.254684948465243, "learning_rate": 3.690096243110042e-07, "loss": 0.1897, "step": 11473 }, { "epoch": 0.9159415662169713, "grad_norm": 0.24838950993501518, "learning_rate": 3.6831407671265584e-07, "loss": 0.155, "step": 11474 }, { "epoch": 0.9160213937894148, "grad_norm": 0.26543960223021335, "learning_rate": 3.676191729543932e-07, "loss": 0.1944, "step": 11475 }, { "epoch": 0.9161012213618583, "grad_norm": 0.27499004974907104, "learning_rate": 3.669249130826713e-07, "loss": 0.1605, "step": 11476 }, { "epoch": 0.9161810489343019, "grad_norm": 0.321328121268087, "learning_rate": 3.662312971438975e-07, "loss": 0.173, "step": 11477 }, { "epoch": 0.9162608765067455, "grad_norm": 0.28338381331117346, "learning_rate": 3.655383251844336e-07, "loss": 0.1797, "step": 11478 }, { "epoch": 0.916340704079189, "grad_norm": 0.299045879998159, "learning_rate": 3.648459972506069e-07, "loss": 0.1732, "step": 11479 }, { "epoch": 0.9164205316516325, "grad_norm": 0.26178261074216946, "learning_rate": 3.641543133886938e-07, "loss": 0.147, "step": 11480 }, { "epoch": 0.916500359224076, "grad_norm": 0.33541027655687383, "learning_rate": 3.6346327364493397e-07, "loss": 0.1269, "step": 11481 }, { "epoch": 0.9165801867965195, "grad_norm": 0.26215426651931717, "learning_rate": 3.6277287806551596e-07, "loss": 0.131, "step": 11482 }, { "epoch": 0.916660014368963, "grad_norm": 0.2644222514846811, "learning_rate": 3.6208312669659405e-07, "loss": 0.143, "step": 11483 }, { "epoch": 0.9167398419414066, "grad_norm": 0.256000645822836, "learning_rate": 3.6139401958427354e-07, "loss": 0.1275, "step": 11484 }, { "epoch": 0.9168196695138501, "grad_norm": 0.29984981619479517, "learning_rate": 3.607055567746187e-07, "loss": 0.134, "step": 11485 }, { "epoch": 0.9168994970862936, "grad_norm": 0.2962492951246031, "learning_rate": 3.600177383136516e-07, "loss": 0.1668, "step": 11486 }, { "epoch": 0.9169793246587371, "grad_norm": 0.2899225920550812, "learning_rate": 3.593305642473466e-07, "loss": 0.1949, "step": 11487 }, { "epoch": 0.9170591522311806, "grad_norm": 0.2969169223312724, "learning_rate": 3.586440346216435e-07, "loss": 0.1727, "step": 11488 }, { "epoch": 0.9171389798036241, "grad_norm": 0.34139155774427243, "learning_rate": 3.5795814948243134e-07, "loss": 0.1818, "step": 11489 }, { "epoch": 0.9172188073760676, "grad_norm": 0.2917794797994389, "learning_rate": 3.572729088755589e-07, "loss": 0.1358, "step": 11490 }, { "epoch": 0.9172986349485113, "grad_norm": 0.26871332502847234, "learning_rate": 3.565883128468306e-07, "loss": 0.1757, "step": 11491 }, { "epoch": 0.9173784625209548, "grad_norm": 0.3363782852304388, "learning_rate": 3.559043614420121e-07, "loss": 0.1636, "step": 11492 }, { "epoch": 0.9174582900933983, "grad_norm": 0.26794205794351883, "learning_rate": 3.5522105470682246e-07, "loss": 0.1427, "step": 11493 }, { "epoch": 0.9175381176658418, "grad_norm": 0.321993747827323, "learning_rate": 3.54538392686935e-07, "loss": 0.1539, "step": 11494 }, { "epoch": 0.9176179452382853, "grad_norm": 0.30896230658793505, "learning_rate": 3.538563754279867e-07, "loss": 0.1849, "step": 11495 }, { "epoch": 0.9176977728107288, "grad_norm": 0.3352081246542836, "learning_rate": 3.531750029755632e-07, "loss": 0.2084, "step": 11496 }, { "epoch": 0.9177776003831724, "grad_norm": 0.2442454113018687, "learning_rate": 3.5249427537521585e-07, "loss": 0.1587, "step": 11497 }, { "epoch": 0.9178574279556159, "grad_norm": 0.3152985295336149, "learning_rate": 3.518141926724472e-07, "loss": 0.135, "step": 11498 }, { "epoch": 0.9179372555280594, "grad_norm": 0.31135551850697507, "learning_rate": 3.5113475491271753e-07, "loss": 0.1966, "step": 11499 }, { "epoch": 0.9180170831005029, "grad_norm": 0.2532988916545659, "learning_rate": 3.5045596214144497e-07, "loss": 0.1507, "step": 11500 }, { "epoch": 0.9180969106729464, "grad_norm": 0.32829150891150866, "learning_rate": 3.4977781440400206e-07, "loss": 0.1721, "step": 11501 }, { "epoch": 0.9181767382453899, "grad_norm": 0.29496881741072367, "learning_rate": 3.491003117457248e-07, "loss": 0.132, "step": 11502 }, { "epoch": 0.9182565658178334, "grad_norm": 0.26005794282787004, "learning_rate": 3.48423454211898e-07, "loss": 0.1634, "step": 11503 }, { "epoch": 0.918336393390277, "grad_norm": 0.30773376352323845, "learning_rate": 3.47747241847769e-07, "loss": 0.175, "step": 11504 }, { "epoch": 0.9184162209627206, "grad_norm": 0.29844793683460563, "learning_rate": 3.47071674698537e-07, "loss": 0.1405, "step": 11505 }, { "epoch": 0.9184960485351641, "grad_norm": 0.2837975958764012, "learning_rate": 3.463967528093637e-07, "loss": 0.1335, "step": 11506 }, { "epoch": 0.9185758761076076, "grad_norm": 0.34702186896685416, "learning_rate": 3.4572247622536416e-07, "loss": 0.1749, "step": 11507 }, { "epoch": 0.9186557036800511, "grad_norm": 0.2761497877864921, "learning_rate": 3.4504884499161117e-07, "loss": 0.123, "step": 11508 }, { "epoch": 0.9187355312524946, "grad_norm": 0.28514558122829514, "learning_rate": 3.443758591531343e-07, "loss": 0.141, "step": 11509 }, { "epoch": 0.9188153588249381, "grad_norm": 0.2955124539443993, "learning_rate": 3.437035187549187e-07, "loss": 0.1658, "step": 11510 }, { "epoch": 0.9188951863973817, "grad_norm": 0.26758431578919717, "learning_rate": 3.430318238419095e-07, "loss": 0.1307, "step": 11511 }, { "epoch": 0.9189750139698252, "grad_norm": 0.3028749900123632, "learning_rate": 3.4236077445900626e-07, "loss": 0.1622, "step": 11512 }, { "epoch": 0.9190548415422687, "grad_norm": 0.28411170712741807, "learning_rate": 3.416903706510666e-07, "loss": 0.1339, "step": 11513 }, { "epoch": 0.9191346691147122, "grad_norm": 0.33102982622395877, "learning_rate": 3.410206124629034e-07, "loss": 0.2246, "step": 11514 }, { "epoch": 0.9192144966871557, "grad_norm": 0.2754819310378443, "learning_rate": 3.403514999392865e-07, "loss": 0.2112, "step": 11515 }, { "epoch": 0.9192943242595992, "grad_norm": 0.2861454291839633, "learning_rate": 3.3968303312494676e-07, "loss": 0.1607, "step": 11516 }, { "epoch": 0.9193741518320427, "grad_norm": 0.2871215815984728, "learning_rate": 3.39015212064564e-07, "loss": 0.1739, "step": 11517 }, { "epoch": 0.9194539794044863, "grad_norm": 0.30683718841435514, "learning_rate": 3.383480368027825e-07, "loss": 0.1646, "step": 11518 }, { "epoch": 0.9195338069769299, "grad_norm": 0.28417375516666915, "learning_rate": 3.376815073841988e-07, "loss": 0.1669, "step": 11519 }, { "epoch": 0.9196136345493734, "grad_norm": 0.28090416804382207, "learning_rate": 3.370156238533706e-07, "loss": 0.1651, "step": 11520 }, { "epoch": 0.9196934621218169, "grad_norm": 0.3108075939381015, "learning_rate": 3.3635038625480455e-07, "loss": 0.1638, "step": 11521 }, { "epoch": 0.9197732896942604, "grad_norm": 0.27558939758221823, "learning_rate": 3.356857946329739e-07, "loss": 0.1186, "step": 11522 }, { "epoch": 0.9198531172667039, "grad_norm": 0.2990602297926821, "learning_rate": 3.3502184903230205e-07, "loss": 0.2063, "step": 11523 }, { "epoch": 0.9199329448391474, "grad_norm": 0.30003153119621023, "learning_rate": 3.343585494971702e-07, "loss": 0.1669, "step": 11524 }, { "epoch": 0.920012772411591, "grad_norm": 0.30045184532987684, "learning_rate": 3.336958960719194e-07, "loss": 0.1661, "step": 11525 }, { "epoch": 0.9200925999840345, "grad_norm": 0.2707161716919704, "learning_rate": 3.330338888008433e-07, "loss": 0.1424, "step": 11526 }, { "epoch": 0.920172427556478, "grad_norm": 0.3078929784049194, "learning_rate": 3.323725277281964e-07, "loss": 0.1698, "step": 11527 }, { "epoch": 0.9202522551289215, "grad_norm": 0.298420297781403, "learning_rate": 3.3171181289818554e-07, "loss": 0.151, "step": 11528 }, { "epoch": 0.920332082701365, "grad_norm": 0.24599425259458357, "learning_rate": 3.31051744354981e-07, "loss": 0.1383, "step": 11529 }, { "epoch": 0.9204119102738085, "grad_norm": 0.2659557300087598, "learning_rate": 3.303923221427019e-07, "loss": 0.1487, "step": 11530 }, { "epoch": 0.920491737846252, "grad_norm": 0.28628964914108807, "learning_rate": 3.297335463054285e-07, "loss": 0.156, "step": 11531 }, { "epoch": 0.9205715654186957, "grad_norm": 0.34760830203992504, "learning_rate": 3.290754168872001e-07, "loss": 0.1502, "step": 11532 }, { "epoch": 0.9206513929911392, "grad_norm": 0.25657196178262837, "learning_rate": 3.2841793393200707e-07, "loss": 0.1671, "step": 11533 }, { "epoch": 0.9207312205635827, "grad_norm": 0.28322948590878805, "learning_rate": 3.277610974838008e-07, "loss": 0.1632, "step": 11534 }, { "epoch": 0.9208110481360262, "grad_norm": 0.29725647350718204, "learning_rate": 3.271049075864874e-07, "loss": 0.1606, "step": 11535 }, { "epoch": 0.9208908757084697, "grad_norm": 0.2897972191469736, "learning_rate": 3.2644936428393173e-07, "loss": 0.1286, "step": 11536 }, { "epoch": 0.9209707032809132, "grad_norm": 0.2940867948478219, "learning_rate": 3.2579446761995317e-07, "loss": 0.1447, "step": 11537 }, { "epoch": 0.9210505308533568, "grad_norm": 0.26364437737385366, "learning_rate": 3.2514021763833113e-07, "loss": 0.1618, "step": 11538 }, { "epoch": 0.9211303584258003, "grad_norm": 0.27692892350837617, "learning_rate": 3.244866143827974e-07, "loss": 0.1451, "step": 11539 }, { "epoch": 0.9212101859982438, "grad_norm": 0.24253569950959183, "learning_rate": 3.2383365789704134e-07, "loss": 0.1679, "step": 11540 }, { "epoch": 0.9212900135706873, "grad_norm": 0.27839658929152455, "learning_rate": 3.231813482247148e-07, "loss": 0.1825, "step": 11541 }, { "epoch": 0.9213698411431308, "grad_norm": 0.28462643628180967, "learning_rate": 3.2252968540941955e-07, "loss": 0.1253, "step": 11542 }, { "epoch": 0.9214496687155743, "grad_norm": 0.2567423375045952, "learning_rate": 3.218786694947162e-07, "loss": 0.1298, "step": 11543 }, { "epoch": 0.9215294962880178, "grad_norm": 0.29279576711363803, "learning_rate": 3.2122830052412456e-07, "loss": 0.13, "step": 11544 }, { "epoch": 0.9216093238604613, "grad_norm": 0.2781127718441782, "learning_rate": 3.205785785411164e-07, "loss": 0.163, "step": 11545 }, { "epoch": 0.921689151432905, "grad_norm": 0.28834725809598977, "learning_rate": 3.199295035891259e-07, "loss": 0.1681, "step": 11546 }, { "epoch": 0.9217689790053485, "grad_norm": 0.2771722311631092, "learning_rate": 3.1928107571154057e-07, "loss": 0.2024, "step": 11547 }, { "epoch": 0.921848806577792, "grad_norm": 0.2808194889222677, "learning_rate": 3.186332949517035e-07, "loss": 0.1481, "step": 11548 }, { "epoch": 0.9219286341502355, "grad_norm": 0.292288888597798, "learning_rate": 3.179861613529167e-07, "loss": 0.1172, "step": 11549 }, { "epoch": 0.922008461722679, "grad_norm": 0.3055869260084753, "learning_rate": 3.173396749584412e-07, "loss": 0.1937, "step": 11550 }, { "epoch": 0.9220882892951225, "grad_norm": 0.2965295390972813, "learning_rate": 3.1669383581148906e-07, "loss": 0.1513, "step": 11551 }, { "epoch": 0.9221681168675661, "grad_norm": 0.25713514456324554, "learning_rate": 3.160486439552335e-07, "loss": 0.1498, "step": 11552 }, { "epoch": 0.9222479444400096, "grad_norm": 0.3544873988262293, "learning_rate": 3.1540409943280226e-07, "loss": 0.1409, "step": 11553 }, { "epoch": 0.9223277720124531, "grad_norm": 0.2783097623200721, "learning_rate": 3.147602022872798e-07, "loss": 0.1592, "step": 11554 }, { "epoch": 0.9224075995848966, "grad_norm": 0.26187284806976396, "learning_rate": 3.141169525617105e-07, "loss": 0.1829, "step": 11555 }, { "epoch": 0.9224874271573401, "grad_norm": 0.276923448008888, "learning_rate": 3.134743502990922e-07, "loss": 0.1796, "step": 11556 }, { "epoch": 0.9225672547297836, "grad_norm": 0.31063549612759855, "learning_rate": 3.1283239554237953e-07, "loss": 0.1818, "step": 11557 }, { "epoch": 0.9226470823022271, "grad_norm": 0.2771255249201293, "learning_rate": 3.1219108833448364e-07, "loss": 0.1458, "step": 11558 }, { "epoch": 0.9227269098746708, "grad_norm": 0.2764978060427533, "learning_rate": 3.115504287182758e-07, "loss": 0.1521, "step": 11559 }, { "epoch": 0.9228067374471143, "grad_norm": 0.2934084669480994, "learning_rate": 3.1091041673658175e-07, "loss": 0.1802, "step": 11560 }, { "epoch": 0.9228865650195578, "grad_norm": 0.3003862545461398, "learning_rate": 3.102710524321806e-07, "loss": 0.2073, "step": 11561 }, { "epoch": 0.9229663925920013, "grad_norm": 0.3532388636938103, "learning_rate": 3.096323358478148e-07, "loss": 0.144, "step": 11562 }, { "epoch": 0.9230462201644448, "grad_norm": 0.3013083320458835, "learning_rate": 3.0899426702617695e-07, "loss": 0.1521, "step": 11563 }, { "epoch": 0.9231260477368883, "grad_norm": 0.26068802928355655, "learning_rate": 3.0835684600992287e-07, "loss": 0.1534, "step": 11564 }, { "epoch": 0.9232058753093318, "grad_norm": 0.317259145379761, "learning_rate": 3.077200728416574e-07, "loss": 0.193, "step": 11565 }, { "epoch": 0.9232857028817754, "grad_norm": 0.274742455795804, "learning_rate": 3.0708394756394976e-07, "loss": 0.1365, "step": 11566 }, { "epoch": 0.9233655304542189, "grad_norm": 0.2679487500863279, "learning_rate": 3.0644847021932156e-07, "loss": 0.149, "step": 11567 }, { "epoch": 0.9234453580266624, "grad_norm": 0.2887716257642722, "learning_rate": 3.058136408502499e-07, "loss": 0.1461, "step": 11568 }, { "epoch": 0.9235251855991059, "grad_norm": 0.2739585863959681, "learning_rate": 3.051794594991753e-07, "loss": 0.1503, "step": 11569 }, { "epoch": 0.9236050131715494, "grad_norm": 0.2690618883430861, "learning_rate": 3.045459262084849e-07, "loss": 0.1308, "step": 11570 }, { "epoch": 0.9236848407439929, "grad_norm": 0.2834023092558183, "learning_rate": 3.0391304102053154e-07, "loss": 0.1613, "step": 11571 }, { "epoch": 0.9237646683164364, "grad_norm": 0.3235904215056009, "learning_rate": 3.0328080397761917e-07, "loss": 0.1399, "step": 11572 }, { "epoch": 0.9238444958888801, "grad_norm": 0.298104188552135, "learning_rate": 3.0264921512201286e-07, "loss": 0.1651, "step": 11573 }, { "epoch": 0.9239243234613236, "grad_norm": 0.3132806151867608, "learning_rate": 3.020182744959288e-07, "loss": 0.1545, "step": 11574 }, { "epoch": 0.9240041510337671, "grad_norm": 0.2611343726568643, "learning_rate": 3.013879821415433e-07, "loss": 0.1908, "step": 11575 }, { "epoch": 0.9240839786062106, "grad_norm": 0.2506897526989364, "learning_rate": 3.007583381009904e-07, "loss": 0.156, "step": 11576 }, { "epoch": 0.9241638061786541, "grad_norm": 0.27471987383330104, "learning_rate": 3.001293424163587e-07, "loss": 0.1525, "step": 11577 }, { "epoch": 0.9242436337510976, "grad_norm": 0.2656733033592708, "learning_rate": 2.9950099512969346e-07, "loss": 0.1648, "step": 11578 }, { "epoch": 0.9243234613235412, "grad_norm": 0.2757882092625423, "learning_rate": 2.9887329628299765e-07, "loss": 0.19, "step": 11579 }, { "epoch": 0.9244032888959847, "grad_norm": 0.2746750453103568, "learning_rate": 2.9824624591823115e-07, "loss": 0.1518, "step": 11580 }, { "epoch": 0.9244831164684282, "grad_norm": 0.25333068991091134, "learning_rate": 2.976198440773093e-07, "loss": 0.1863, "step": 11581 }, { "epoch": 0.9245629440408717, "grad_norm": 0.2789649305769938, "learning_rate": 2.9699409080210407e-07, "loss": 0.1614, "step": 11582 }, { "epoch": 0.9246427716133152, "grad_norm": 0.2720356981736501, "learning_rate": 2.963689861344443e-07, "loss": 0.1351, "step": 11583 }, { "epoch": 0.9247225991857587, "grad_norm": 0.298164525320255, "learning_rate": 2.9574453011611657e-07, "loss": 0.1533, "step": 11584 }, { "epoch": 0.9248024267582022, "grad_norm": 0.2649660869215561, "learning_rate": 2.9512072278886416e-07, "loss": 0.1549, "step": 11585 }, { "epoch": 0.9248822543306459, "grad_norm": 0.2892384943059814, "learning_rate": 2.944975641943848e-07, "loss": 0.1456, "step": 11586 }, { "epoch": 0.9249620819030894, "grad_norm": 0.3248372493036553, "learning_rate": 2.938750543743352e-07, "loss": 0.1318, "step": 11587 }, { "epoch": 0.9250419094755329, "grad_norm": 0.26570561155453404, "learning_rate": 2.9325319337032534e-07, "loss": 0.18, "step": 11588 }, { "epoch": 0.9251217370479764, "grad_norm": 0.3020583949787851, "learning_rate": 2.9263198122392643e-07, "loss": 0.1585, "step": 11589 }, { "epoch": 0.9252015646204199, "grad_norm": 0.3069192977252095, "learning_rate": 2.9201141797666423e-07, "loss": 0.1672, "step": 11590 }, { "epoch": 0.9252813921928634, "grad_norm": 0.2461373278662016, "learning_rate": 2.913915036700188e-07, "loss": 0.1593, "step": 11591 }, { "epoch": 0.9253612197653069, "grad_norm": 0.24329090614851676, "learning_rate": 2.9077223834543144e-07, "loss": 0.1518, "step": 11592 }, { "epoch": 0.9254410473377505, "grad_norm": 0.2507034420715888, "learning_rate": 2.9015362204429574e-07, "loss": 0.155, "step": 11593 }, { "epoch": 0.925520874910194, "grad_norm": 0.3244792730486492, "learning_rate": 2.895356548079642e-07, "loss": 0.134, "step": 11594 }, { "epoch": 0.9256007024826375, "grad_norm": 0.2713099380944664, "learning_rate": 2.8891833667774703e-07, "loss": 0.1392, "step": 11595 }, { "epoch": 0.925680530055081, "grad_norm": 0.28917691567946324, "learning_rate": 2.8830166769490797e-07, "loss": 0.1571, "step": 11596 }, { "epoch": 0.9257603576275245, "grad_norm": 0.29915897829903626, "learning_rate": 2.8768564790066844e-07, "loss": 0.1472, "step": 11597 }, { "epoch": 0.925840185199968, "grad_norm": 0.29176823973790195, "learning_rate": 2.8707027733620663e-07, "loss": 0.1793, "step": 11598 }, { "epoch": 0.9259200127724115, "grad_norm": 0.2667942559799191, "learning_rate": 2.864555560426596e-07, "loss": 0.1332, "step": 11599 }, { "epoch": 0.9259998403448552, "grad_norm": 0.29399278437360327, "learning_rate": 2.85841484061119e-07, "loss": 0.1558, "step": 11600 }, { "epoch": 0.9260796679172987, "grad_norm": 0.2731386484137842, "learning_rate": 2.8522806143263195e-07, "loss": 0.1733, "step": 11601 }, { "epoch": 0.9261594954897422, "grad_norm": 0.31315241994241716, "learning_rate": 2.846152881982012e-07, "loss": 0.1571, "step": 11602 }, { "epoch": 0.9262393230621857, "grad_norm": 0.29305847426779186, "learning_rate": 2.8400316439879284e-07, "loss": 0.1586, "step": 11603 }, { "epoch": 0.9263191506346292, "grad_norm": 0.2711533471980591, "learning_rate": 2.833916900753231e-07, "loss": 0.1523, "step": 11604 }, { "epoch": 0.9263989782070727, "grad_norm": 0.3264570911938464, "learning_rate": 2.827808652686648e-07, "loss": 0.1582, "step": 11605 }, { "epoch": 0.9264788057795162, "grad_norm": 0.3052216607836636, "learning_rate": 2.821706900196508e-07, "loss": 0.1583, "step": 11606 }, { "epoch": 0.9265586333519598, "grad_norm": 0.28578681952737756, "learning_rate": 2.8156116436906854e-07, "loss": 0.1461, "step": 11607 }, { "epoch": 0.9266384609244033, "grad_norm": 0.2747166300542921, "learning_rate": 2.809522883576632e-07, "loss": 0.1648, "step": 11608 }, { "epoch": 0.9267182884968468, "grad_norm": 0.28688989985496877, "learning_rate": 2.803440620261333e-07, "loss": 0.1876, "step": 11609 }, { "epoch": 0.9267981160692903, "grad_norm": 0.33174166476610256, "learning_rate": 2.7973648541513963e-07, "loss": 0.157, "step": 11610 }, { "epoch": 0.9268779436417338, "grad_norm": 0.24838024622830962, "learning_rate": 2.7912955856529425e-07, "loss": 0.1674, "step": 11611 }, { "epoch": 0.9269577712141773, "grad_norm": 0.2675660684853808, "learning_rate": 2.78523281517169e-07, "loss": 0.1812, "step": 11612 }, { "epoch": 0.927037598786621, "grad_norm": 0.2917530533080836, "learning_rate": 2.779176543112905e-07, "loss": 0.154, "step": 11613 }, { "epoch": 0.9271174263590645, "grad_norm": 0.26915388408582713, "learning_rate": 2.773126769881407e-07, "loss": 0.1434, "step": 11614 }, { "epoch": 0.927197253931508, "grad_norm": 0.31242285178558016, "learning_rate": 2.7670834958816283e-07, "loss": 0.2088, "step": 11615 }, { "epoch": 0.9272770815039515, "grad_norm": 0.3105440075573235, "learning_rate": 2.761046721517513e-07, "loss": 0.12, "step": 11616 }, { "epoch": 0.927356909076395, "grad_norm": 0.2686415905415936, "learning_rate": 2.7550164471926267e-07, "loss": 0.1591, "step": 11617 }, { "epoch": 0.9274367366488385, "grad_norm": 0.3221897312091306, "learning_rate": 2.748992673310036e-07, "loss": 0.1891, "step": 11618 }, { "epoch": 0.927516564221282, "grad_norm": 0.27873438584484755, "learning_rate": 2.7429754002724406e-07, "loss": 0.2164, "step": 11619 }, { "epoch": 0.9275963917937255, "grad_norm": 0.2793054848283847, "learning_rate": 2.736964628482042e-07, "loss": 0.176, "step": 11620 }, { "epoch": 0.9276762193661691, "grad_norm": 0.2641134958172006, "learning_rate": 2.730960358340651e-07, "loss": 0.1575, "step": 11621 }, { "epoch": 0.9277560469386126, "grad_norm": 0.24587802309134543, "learning_rate": 2.7249625902496245e-07, "loss": 0.1884, "step": 11622 }, { "epoch": 0.9278358745110561, "grad_norm": 0.3024594394687761, "learning_rate": 2.718971324609876e-07, "loss": 0.1671, "step": 11623 }, { "epoch": 0.9279157020834996, "grad_norm": 0.23777085223624517, "learning_rate": 2.7129865618219287e-07, "loss": 0.1121, "step": 11624 }, { "epoch": 0.9279955296559431, "grad_norm": 0.2591798785265726, "learning_rate": 2.7070083022858185e-07, "loss": 0.1691, "step": 11625 }, { "epoch": 0.9280753572283866, "grad_norm": 0.26838637457803116, "learning_rate": 2.7010365464011813e-07, "loss": 0.1537, "step": 11626 }, { "epoch": 0.9281551848008303, "grad_norm": 0.2755867186785866, "learning_rate": 2.6950712945671863e-07, "loss": 0.1738, "step": 11627 }, { "epoch": 0.9282350123732738, "grad_norm": 0.3053677591326956, "learning_rate": 2.689112547182593e-07, "loss": 0.133, "step": 11628 }, { "epoch": 0.9283148399457173, "grad_norm": 0.28767856306986234, "learning_rate": 2.6831603046457376e-07, "loss": 0.1583, "step": 11629 }, { "epoch": 0.9283946675181608, "grad_norm": 0.2642535578341135, "learning_rate": 2.6772145673545023e-07, "loss": 0.1282, "step": 11630 }, { "epoch": 0.9284744950906043, "grad_norm": 0.2876056567232695, "learning_rate": 2.6712753357063136e-07, "loss": 0.1563, "step": 11631 }, { "epoch": 0.9285543226630478, "grad_norm": 0.26134866251060934, "learning_rate": 2.6653426100981874e-07, "loss": 0.2179, "step": 11632 }, { "epoch": 0.9286341502354913, "grad_norm": 0.3102019534193862, "learning_rate": 2.659416390926728e-07, "loss": 0.1667, "step": 11633 }, { "epoch": 0.9287139778079349, "grad_norm": 0.2774204591073321, "learning_rate": 2.653496678588063e-07, "loss": 0.19, "step": 11634 }, { "epoch": 0.9287938053803784, "grad_norm": 0.28191113834241505, "learning_rate": 2.647583473477899e-07, "loss": 0.2141, "step": 11635 }, { "epoch": 0.9288736329528219, "grad_norm": 0.28164605266496406, "learning_rate": 2.6416767759915194e-07, "loss": 0.1535, "step": 11636 }, { "epoch": 0.9289534605252654, "grad_norm": 0.2631967386306052, "learning_rate": 2.6357765865237526e-07, "loss": 0.1314, "step": 11637 }, { "epoch": 0.9290332880977089, "grad_norm": 0.2588600278735295, "learning_rate": 2.629882905469017e-07, "loss": 0.1673, "step": 11638 }, { "epoch": 0.9291131156701524, "grad_norm": 0.27552674359714174, "learning_rate": 2.623995733221274e-07, "loss": 0.1522, "step": 11639 }, { "epoch": 0.929192943242596, "grad_norm": 0.3084578513882427, "learning_rate": 2.6181150701740653e-07, "loss": 0.1573, "step": 11640 }, { "epoch": 0.9292727708150396, "grad_norm": 0.27506148488733684, "learning_rate": 2.6122409167204766e-07, "loss": 0.1332, "step": 11641 }, { "epoch": 0.9293525983874831, "grad_norm": 0.2858323197682248, "learning_rate": 2.606373273253171e-07, "loss": 0.1643, "step": 11642 }, { "epoch": 0.9294324259599266, "grad_norm": 0.2653999074669343, "learning_rate": 2.6005121401643907e-07, "loss": 0.2069, "step": 11643 }, { "epoch": 0.9295122535323701, "grad_norm": 0.25232051524102467, "learning_rate": 2.594657517845922e-07, "loss": 0.1319, "step": 11644 }, { "epoch": 0.9295920811048136, "grad_norm": 0.28836967434181765, "learning_rate": 2.588809406689119e-07, "loss": 0.1665, "step": 11645 }, { "epoch": 0.9296719086772571, "grad_norm": 0.25760710849127644, "learning_rate": 2.582967807084913e-07, "loss": 0.1237, "step": 11646 }, { "epoch": 0.9297517362497006, "grad_norm": 0.267674480333901, "learning_rate": 2.5771327194237916e-07, "loss": 0.1586, "step": 11647 }, { "epoch": 0.9298315638221442, "grad_norm": 0.2688673791226135, "learning_rate": 2.571304144095821e-07, "loss": 0.1321, "step": 11648 }, { "epoch": 0.9299113913945877, "grad_norm": 0.2708528358667161, "learning_rate": 2.565482081490567e-07, "loss": 0.1823, "step": 11649 }, { "epoch": 0.9299912189670312, "grad_norm": 0.31343955337091656, "learning_rate": 2.5596665319972737e-07, "loss": 0.1751, "step": 11650 }, { "epoch": 0.9300710465394747, "grad_norm": 0.27969324923158007, "learning_rate": 2.5538574960046414e-07, "loss": 0.138, "step": 11651 }, { "epoch": 0.9301508741119182, "grad_norm": 0.2616850533088984, "learning_rate": 2.548054973901015e-07, "loss": 0.197, "step": 11652 }, { "epoch": 0.9302307016843617, "grad_norm": 0.35194300014697183, "learning_rate": 2.54225896607424e-07, "loss": 0.1677, "step": 11653 }, { "epoch": 0.9303105292568054, "grad_norm": 0.2645137016057765, "learning_rate": 2.5364694729117847e-07, "loss": 0.1495, "step": 11654 }, { "epoch": 0.9303903568292489, "grad_norm": 0.31654080660662215, "learning_rate": 2.530686494800627e-07, "loss": 0.1285, "step": 11655 }, { "epoch": 0.9304701844016924, "grad_norm": 0.2991334989073161, "learning_rate": 2.5249100321273703e-07, "loss": 0.1895, "step": 11656 }, { "epoch": 0.9305500119741359, "grad_norm": 0.28093552841246056, "learning_rate": 2.5191400852781266e-07, "loss": 0.1326, "step": 11657 }, { "epoch": 0.9306298395465794, "grad_norm": 0.30852808601413645, "learning_rate": 2.513376654638577e-07, "loss": 0.1723, "step": 11658 }, { "epoch": 0.9307096671190229, "grad_norm": 0.28381526118047085, "learning_rate": 2.507619740594014e-07, "loss": 0.1447, "step": 11659 }, { "epoch": 0.9307894946914664, "grad_norm": 0.3002991255491876, "learning_rate": 2.5018693435292394e-07, "loss": 0.145, "step": 11660 }, { "epoch": 0.93086932226391, "grad_norm": 0.3015318900806745, "learning_rate": 2.4961254638286804e-07, "loss": 0.1851, "step": 11661 }, { "epoch": 0.9309491498363535, "grad_norm": 0.28265700349619677, "learning_rate": 2.490388101876251e-07, "loss": 0.164, "step": 11662 }, { "epoch": 0.931028977408797, "grad_norm": 0.25463237865616095, "learning_rate": 2.4846572580555004e-07, "loss": 0.1758, "step": 11663 }, { "epoch": 0.9311088049812405, "grad_norm": 0.2692163249519013, "learning_rate": 2.4789329327495003e-07, "loss": 0.1179, "step": 11664 }, { "epoch": 0.931188632553684, "grad_norm": 0.29949046883117464, "learning_rate": 2.4732151263409e-07, "loss": 0.1526, "step": 11665 }, { "epoch": 0.9312684601261275, "grad_norm": 0.3191334790194632, "learning_rate": 2.4675038392119155e-07, "loss": 0.1699, "step": 11666 }, { "epoch": 0.931348287698571, "grad_norm": 0.2804017425443797, "learning_rate": 2.4617990717443196e-07, "loss": 0.1744, "step": 11667 }, { "epoch": 0.9314281152710147, "grad_norm": 0.27003391510138924, "learning_rate": 2.4561008243194627e-07, "loss": 0.1548, "step": 11668 }, { "epoch": 0.9315079428434582, "grad_norm": 0.2345505668353308, "learning_rate": 2.45040909731824e-07, "loss": 0.1026, "step": 11669 }, { "epoch": 0.9315877704159017, "grad_norm": 0.3139396153367699, "learning_rate": 2.444723891121115e-07, "loss": 0.132, "step": 11670 }, { "epoch": 0.9316675979883452, "grad_norm": 0.3039942627928195, "learning_rate": 2.439045206108148e-07, "loss": 0.1772, "step": 11671 }, { "epoch": 0.9317474255607887, "grad_norm": 0.28869304705154614, "learning_rate": 2.4333730426588933e-07, "loss": 0.158, "step": 11672 }, { "epoch": 0.9318272531332322, "grad_norm": 0.2503796527067671, "learning_rate": 2.427707401152557e-07, "loss": 0.1618, "step": 11673 }, { "epoch": 0.9319070807056757, "grad_norm": 0.29072786344376617, "learning_rate": 2.422048281967848e-07, "loss": 0.1551, "step": 11674 }, { "epoch": 0.9319869082781193, "grad_norm": 0.29907834427629565, "learning_rate": 2.416395685483053e-07, "loss": 0.1619, "step": 11675 }, { "epoch": 0.9320667358505628, "grad_norm": 0.2644416303643685, "learning_rate": 2.4107496120760134e-07, "loss": 0.1372, "step": 11676 }, { "epoch": 0.9321465634230063, "grad_norm": 0.2477485237910791, "learning_rate": 2.405110062124172e-07, "loss": 0.1522, "step": 11677 }, { "epoch": 0.9322263909954498, "grad_norm": 0.27012897079029946, "learning_rate": 2.399477036004505e-07, "loss": 0.1846, "step": 11678 }, { "epoch": 0.9323062185678933, "grad_norm": 0.30568203625147167, "learning_rate": 2.393850534093545e-07, "loss": 0.1699, "step": 11679 }, { "epoch": 0.9323860461403368, "grad_norm": 0.28005443974897726, "learning_rate": 2.3882305567674013e-07, "loss": 0.1727, "step": 11680 }, { "epoch": 0.9324658737127804, "grad_norm": 0.2815101644956429, "learning_rate": 2.382617104401752e-07, "loss": 0.1425, "step": 11681 }, { "epoch": 0.932545701285224, "grad_norm": 0.3076406061596535, "learning_rate": 2.3770101773718524e-07, "loss": 0.1382, "step": 11682 }, { "epoch": 0.9326255288576675, "grad_norm": 0.24884122046463503, "learning_rate": 2.3714097760524802e-07, "loss": 0.146, "step": 11683 }, { "epoch": 0.932705356430111, "grad_norm": 0.27063425911216515, "learning_rate": 2.3658159008180026e-07, "loss": 0.1417, "step": 11684 }, { "epoch": 0.9327851840025545, "grad_norm": 0.29534923583366357, "learning_rate": 2.360228552042354e-07, "loss": 0.1527, "step": 11685 }, { "epoch": 0.932865011574998, "grad_norm": 0.3019569087382149, "learning_rate": 2.3546477300990245e-07, "loss": 0.16, "step": 11686 }, { "epoch": 0.9329448391474415, "grad_norm": 0.26521674898448633, "learning_rate": 2.349073435361071e-07, "loss": 0.1608, "step": 11687 }, { "epoch": 0.933024666719885, "grad_norm": 0.2403714334802651, "learning_rate": 2.3435056682011182e-07, "loss": 0.1612, "step": 11688 }, { "epoch": 0.9331044942923286, "grad_norm": 0.3145214314234303, "learning_rate": 2.3379444289913344e-07, "loss": 0.1565, "step": 11689 }, { "epoch": 0.9331843218647721, "grad_norm": 0.27059881191807794, "learning_rate": 2.3323897181034783e-07, "loss": 0.1446, "step": 11690 }, { "epoch": 0.9332641494372156, "grad_norm": 0.24967259019619867, "learning_rate": 2.3268415359088525e-07, "loss": 0.1617, "step": 11691 }, { "epoch": 0.9333439770096591, "grad_norm": 0.30934431751144126, "learning_rate": 2.3212998827783494e-07, "loss": 0.1666, "step": 11692 }, { "epoch": 0.9334238045821026, "grad_norm": 0.2371124447417469, "learning_rate": 2.315764759082384e-07, "loss": 0.1412, "step": 11693 }, { "epoch": 0.9335036321545461, "grad_norm": 0.2773652611939739, "learning_rate": 2.310236165190971e-07, "loss": 0.167, "step": 11694 }, { "epoch": 0.9335834597269898, "grad_norm": 0.2861157580856069, "learning_rate": 2.3047141014736595e-07, "loss": 0.1753, "step": 11695 }, { "epoch": 0.9336632872994333, "grad_norm": 0.27631483921204947, "learning_rate": 2.29919856829961e-07, "loss": 0.2256, "step": 11696 }, { "epoch": 0.9337431148718768, "grad_norm": 0.2605824583317585, "learning_rate": 2.2936895660374713e-07, "loss": 0.1874, "step": 11697 }, { "epoch": 0.9338229424443203, "grad_norm": 0.27082951825802704, "learning_rate": 2.2881870950555385e-07, "loss": 0.1188, "step": 11698 }, { "epoch": 0.9339027700167638, "grad_norm": 0.29635175999324953, "learning_rate": 2.2826911557215946e-07, "loss": 0.1892, "step": 11699 }, { "epoch": 0.9339825975892073, "grad_norm": 0.2763366445906843, "learning_rate": 2.2772017484030573e-07, "loss": 0.1817, "step": 11700 }, { "epoch": 0.9340624251616508, "grad_norm": 0.2574634575841091, "learning_rate": 2.271718873466844e-07, "loss": 0.1919, "step": 11701 }, { "epoch": 0.9341422527340943, "grad_norm": 0.26590260837483326, "learning_rate": 2.2662425312794612e-07, "loss": 0.1371, "step": 11702 }, { "epoch": 0.9342220803065379, "grad_norm": 0.2887279426953473, "learning_rate": 2.260772722206994e-07, "loss": 0.1478, "step": 11703 }, { "epoch": 0.9343019078789814, "grad_norm": 0.28652075438246305, "learning_rate": 2.2553094466150837e-07, "loss": 0.0975, "step": 11704 }, { "epoch": 0.9343817354514249, "grad_norm": 0.26435820286018824, "learning_rate": 2.2498527048689156e-07, "loss": 0.1567, "step": 11705 }, { "epoch": 0.9344615630238684, "grad_norm": 0.2887640719015719, "learning_rate": 2.2444024973332533e-07, "loss": 0.141, "step": 11706 }, { "epoch": 0.9345413905963119, "grad_norm": 0.2748155387189502, "learning_rate": 2.238958824372428e-07, "loss": 0.1246, "step": 11707 }, { "epoch": 0.9346212181687555, "grad_norm": 0.29631087095590447, "learning_rate": 2.2335216863503262e-07, "loss": 0.1329, "step": 11708 }, { "epoch": 0.934701045741199, "grad_norm": 0.26659066685528626, "learning_rate": 2.2280910836303905e-07, "loss": 0.146, "step": 11709 }, { "epoch": 0.9347808733136426, "grad_norm": 0.27398091829317583, "learning_rate": 2.2226670165756524e-07, "loss": 0.216, "step": 11710 }, { "epoch": 0.9348607008860861, "grad_norm": 0.2569239234463626, "learning_rate": 2.2172494855486559e-07, "loss": 0.1614, "step": 11711 }, { "epoch": 0.9349405284585296, "grad_norm": 0.2768829868738466, "learning_rate": 2.2118384909115775e-07, "loss": 0.1441, "step": 11712 }, { "epoch": 0.9350203560309731, "grad_norm": 0.2724295821496062, "learning_rate": 2.2064340330261057e-07, "loss": 0.1668, "step": 11713 }, { "epoch": 0.9351001836034166, "grad_norm": 0.2946394917665034, "learning_rate": 2.2010361122535183e-07, "loss": 0.1413, "step": 11714 }, { "epoch": 0.9351800111758601, "grad_norm": 0.28430199629250785, "learning_rate": 2.1956447289546268e-07, "loss": 0.179, "step": 11715 }, { "epoch": 0.9352598387483037, "grad_norm": 0.26994165384840224, "learning_rate": 2.1902598834898202e-07, "loss": 0.137, "step": 11716 }, { "epoch": 0.9353396663207472, "grad_norm": 0.31102247628992, "learning_rate": 2.184881576219078e-07, "loss": 0.1471, "step": 11717 }, { "epoch": 0.9354194938931907, "grad_norm": 0.27941970781477493, "learning_rate": 2.1795098075019117e-07, "loss": 0.1293, "step": 11718 }, { "epoch": 0.9354993214656342, "grad_norm": 0.28545861289686, "learning_rate": 2.1741445776973903e-07, "loss": 0.1863, "step": 11719 }, { "epoch": 0.9355791490380777, "grad_norm": 0.2549487363096304, "learning_rate": 2.1687858871641597e-07, "loss": 0.1152, "step": 11720 }, { "epoch": 0.9356589766105212, "grad_norm": 0.31106093528518786, "learning_rate": 2.163433736260445e-07, "loss": 0.1938, "step": 11721 }, { "epoch": 0.9357388041829648, "grad_norm": 0.31243350378417495, "learning_rate": 2.1580881253440045e-07, "loss": 0.1295, "step": 11722 }, { "epoch": 0.9358186317554084, "grad_norm": 0.30787580863442066, "learning_rate": 2.152749054772163e-07, "loss": 0.1673, "step": 11723 }, { "epoch": 0.9358984593278519, "grad_norm": 0.27393535903960936, "learning_rate": 2.1474165249018354e-07, "loss": 0.1527, "step": 11724 }, { "epoch": 0.9359782869002954, "grad_norm": 0.2742649955762329, "learning_rate": 2.1420905360894473e-07, "loss": 0.1656, "step": 11725 }, { "epoch": 0.9360581144727389, "grad_norm": 0.3249712002180745, "learning_rate": 2.1367710886910587e-07, "loss": 0.1434, "step": 11726 }, { "epoch": 0.9361379420451824, "grad_norm": 0.27329032291921557, "learning_rate": 2.1314581830622406e-07, "loss": 0.1453, "step": 11727 }, { "epoch": 0.9362177696176259, "grad_norm": 0.28970761035041087, "learning_rate": 2.126151819558131e-07, "loss": 0.1498, "step": 11728 }, { "epoch": 0.9362975971900694, "grad_norm": 0.34830744175256056, "learning_rate": 2.120851998533424e-07, "loss": 0.1805, "step": 11729 }, { "epoch": 0.936377424762513, "grad_norm": 0.26303814072551596, "learning_rate": 2.1155587203424367e-07, "loss": 0.1485, "step": 11730 }, { "epoch": 0.9364572523349565, "grad_norm": 0.2282980011233394, "learning_rate": 2.1102719853389853e-07, "loss": 0.1387, "step": 11731 }, { "epoch": 0.9365370799074, "grad_norm": 0.25107575940816496, "learning_rate": 2.104991793876443e-07, "loss": 0.1705, "step": 11732 }, { "epoch": 0.9366169074798435, "grad_norm": 0.2637210395880627, "learning_rate": 2.0997181463077944e-07, "loss": 0.1828, "step": 11733 }, { "epoch": 0.936696735052287, "grad_norm": 0.3024309092975203, "learning_rate": 2.0944510429855348e-07, "loss": 0.2161, "step": 11734 }, { "epoch": 0.9367765626247306, "grad_norm": 0.27914913078561937, "learning_rate": 2.089190484261794e-07, "loss": 0.1638, "step": 11735 }, { "epoch": 0.9368563901971741, "grad_norm": 0.2667805004403897, "learning_rate": 2.0839364704881903e-07, "loss": 0.119, "step": 11736 }, { "epoch": 0.9369362177696177, "grad_norm": 0.30004519839417687, "learning_rate": 2.0786890020159323e-07, "loss": 0.1387, "step": 11737 }, { "epoch": 0.9370160453420612, "grad_norm": 0.26395128037419247, "learning_rate": 2.0734480791958055e-07, "loss": 0.1727, "step": 11738 }, { "epoch": 0.9370958729145047, "grad_norm": 0.2726047654843773, "learning_rate": 2.06821370237813e-07, "loss": 0.1621, "step": 11739 }, { "epoch": 0.9371757004869482, "grad_norm": 0.26308941097577393, "learning_rate": 2.062985871912826e-07, "loss": 0.1505, "step": 11740 }, { "epoch": 0.9372555280593917, "grad_norm": 0.2864796316976919, "learning_rate": 2.0577645881493246e-07, "loss": 0.135, "step": 11741 }, { "epoch": 0.9373353556318352, "grad_norm": 0.2909398045028868, "learning_rate": 2.052549851436658e-07, "loss": 0.1139, "step": 11742 }, { "epoch": 0.9374151832042787, "grad_norm": 0.26233841516460943, "learning_rate": 2.0473416621234142e-07, "loss": 0.1633, "step": 11743 }, { "epoch": 0.9374950107767223, "grad_norm": 0.2954958627098006, "learning_rate": 2.0421400205577591e-07, "loss": 0.1527, "step": 11744 }, { "epoch": 0.9375748383491658, "grad_norm": 0.3145779779060719, "learning_rate": 2.036944927087381e-07, "loss": 0.189, "step": 11745 }, { "epoch": 0.9376546659216093, "grad_norm": 0.2951387877771411, "learning_rate": 2.0317563820595355e-07, "loss": 0.1589, "step": 11746 }, { "epoch": 0.9377344934940528, "grad_norm": 0.25372481967962357, "learning_rate": 2.0265743858210784e-07, "loss": 0.1503, "step": 11747 }, { "epoch": 0.9378143210664963, "grad_norm": 0.2741581784621961, "learning_rate": 2.0213989387184108e-07, "loss": 0.1631, "step": 11748 }, { "epoch": 0.9378941486389399, "grad_norm": 0.3053860642668491, "learning_rate": 2.0162300410974778e-07, "loss": 0.1797, "step": 11749 }, { "epoch": 0.9379739762113835, "grad_norm": 0.2832888399214491, "learning_rate": 2.011067693303792e-07, "loss": 0.1496, "step": 11750 }, { "epoch": 0.938053803783827, "grad_norm": 0.2576446228913182, "learning_rate": 2.0059118956824663e-07, "loss": 0.1631, "step": 11751 }, { "epoch": 0.9381336313562705, "grad_norm": 0.275680025926746, "learning_rate": 2.0007626485781028e-07, "loss": 0.1909, "step": 11752 }, { "epoch": 0.938213458928714, "grad_norm": 0.29245470451785943, "learning_rate": 1.9956199523349594e-07, "loss": 0.1579, "step": 11753 }, { "epoch": 0.9382932865011575, "grad_norm": 0.26818993806114405, "learning_rate": 1.9904838072967613e-07, "loss": 0.1749, "step": 11754 }, { "epoch": 0.938373114073601, "grad_norm": 0.28694460173009817, "learning_rate": 1.985354213806856e-07, "loss": 0.2017, "step": 11755 }, { "epoch": 0.9384529416460445, "grad_norm": 0.30630872747083715, "learning_rate": 1.9802311722081357e-07, "loss": 0.1376, "step": 11756 }, { "epoch": 0.938532769218488, "grad_norm": 0.25717297217422097, "learning_rate": 1.97511468284306e-07, "loss": 0.1887, "step": 11757 }, { "epoch": 0.9386125967909316, "grad_norm": 0.279449078031711, "learning_rate": 1.9700047460536442e-07, "loss": 0.1594, "step": 11758 }, { "epoch": 0.9386924243633751, "grad_norm": 0.2605943131615361, "learning_rate": 1.9649013621814595e-07, "loss": 0.194, "step": 11759 }, { "epoch": 0.9387722519358186, "grad_norm": 0.2651784196399962, "learning_rate": 1.959804531567655e-07, "loss": 0.182, "step": 11760 }, { "epoch": 0.9388520795082621, "grad_norm": 0.2557516685875487, "learning_rate": 1.9547142545529363e-07, "loss": 0.1409, "step": 11761 }, { "epoch": 0.9389319070807057, "grad_norm": 0.2825781295422345, "learning_rate": 1.9496305314775532e-07, "loss": 0.1742, "step": 11762 }, { "epoch": 0.9390117346531492, "grad_norm": 0.2828565238133932, "learning_rate": 1.944553362681345e-07, "loss": 0.1546, "step": 11763 }, { "epoch": 0.9390915622255928, "grad_norm": 0.30029485575540305, "learning_rate": 1.939482748503696e-07, "loss": 0.1683, "step": 11764 }, { "epoch": 0.9391713897980363, "grad_norm": 0.2809697212077567, "learning_rate": 1.934418689283568e-07, "loss": 0.1425, "step": 11765 }, { "epoch": 0.9392512173704798, "grad_norm": 0.3186029060821427, "learning_rate": 1.9293611853594573e-07, "loss": 0.1642, "step": 11766 }, { "epoch": 0.9393310449429233, "grad_norm": 0.2768250456294959, "learning_rate": 1.9243102370694488e-07, "loss": 0.1654, "step": 11767 }, { "epoch": 0.9394108725153668, "grad_norm": 0.2810318614302811, "learning_rate": 1.9192658447511724e-07, "loss": 0.1654, "step": 11768 }, { "epoch": 0.9394907000878103, "grad_norm": 0.27423540811759833, "learning_rate": 1.914228008741814e-07, "loss": 0.1558, "step": 11769 }, { "epoch": 0.9395705276602538, "grad_norm": 0.3101214191292896, "learning_rate": 1.90919672937816e-07, "loss": 0.1058, "step": 11770 }, { "epoch": 0.9396503552326974, "grad_norm": 0.3154066031368379, "learning_rate": 1.9041720069965185e-07, "loss": 0.1729, "step": 11771 }, { "epoch": 0.9397301828051409, "grad_norm": 0.28808352684226474, "learning_rate": 1.899153841932766e-07, "loss": 0.1614, "step": 11772 }, { "epoch": 0.9398100103775844, "grad_norm": 0.2686664511977149, "learning_rate": 1.8941422345223448e-07, "loss": 0.1438, "step": 11773 }, { "epoch": 0.9398898379500279, "grad_norm": 0.2726065985064597, "learning_rate": 1.8891371851002872e-07, "loss": 0.165, "step": 11774 }, { "epoch": 0.9399696655224714, "grad_norm": 0.2592768085603419, "learning_rate": 1.8841386940011474e-07, "loss": 0.1474, "step": 11775 }, { "epoch": 0.940049493094915, "grad_norm": 0.2635981002090551, "learning_rate": 1.8791467615590254e-07, "loss": 0.1353, "step": 11776 }, { "epoch": 0.9401293206673585, "grad_norm": 0.30587293213298195, "learning_rate": 1.8741613881076537e-07, "loss": 0.1594, "step": 11777 }, { "epoch": 0.9402091482398021, "grad_norm": 0.2574978497545391, "learning_rate": 1.869182573980255e-07, "loss": 0.1523, "step": 11778 }, { "epoch": 0.9402889758122456, "grad_norm": 0.26344315864155177, "learning_rate": 1.864210319509674e-07, "loss": 0.1517, "step": 11779 }, { "epoch": 0.9403688033846891, "grad_norm": 0.29089022979908574, "learning_rate": 1.8592446250282449e-07, "loss": 0.1338, "step": 11780 }, { "epoch": 0.9404486309571326, "grad_norm": 0.26760676360846836, "learning_rate": 1.8542854908679464e-07, "loss": 0.1941, "step": 11781 }, { "epoch": 0.9405284585295761, "grad_norm": 0.2868293205914439, "learning_rate": 1.8493329173602581e-07, "loss": 0.1518, "step": 11782 }, { "epoch": 0.9406082861020196, "grad_norm": 0.2764348987765656, "learning_rate": 1.844386904836226e-07, "loss": 0.1516, "step": 11783 }, { "epoch": 0.9406881136744631, "grad_norm": 0.26121453666520084, "learning_rate": 1.8394474536265083e-07, "loss": 0.1617, "step": 11784 }, { "epoch": 0.9407679412469067, "grad_norm": 0.27979205673656804, "learning_rate": 1.8345145640612517e-07, "loss": 0.1283, "step": 11785 }, { "epoch": 0.9408477688193502, "grad_norm": 0.275584630495474, "learning_rate": 1.8295882364702145e-07, "loss": 0.1759, "step": 11786 }, { "epoch": 0.9409275963917937, "grad_norm": 0.2534068400824577, "learning_rate": 1.8246684711827e-07, "loss": 0.128, "step": 11787 }, { "epoch": 0.9410074239642372, "grad_norm": 0.25911906294377085, "learning_rate": 1.8197552685275898e-07, "loss": 0.1993, "step": 11788 }, { "epoch": 0.9410872515366808, "grad_norm": 0.3116847102909184, "learning_rate": 1.8148486288332766e-07, "loss": 0.1839, "step": 11789 }, { "epoch": 0.9411670791091243, "grad_norm": 0.2705949867396374, "learning_rate": 1.8099485524277872e-07, "loss": 0.099, "step": 11790 }, { "epoch": 0.9412469066815679, "grad_norm": 0.270887430031374, "learning_rate": 1.805055039638659e-07, "loss": 0.1458, "step": 11791 }, { "epoch": 0.9413267342540114, "grad_norm": 0.292832521226623, "learning_rate": 1.8001680907930085e-07, "loss": 0.1729, "step": 11792 }, { "epoch": 0.9414065618264549, "grad_norm": 0.2678073298050535, "learning_rate": 1.7952877062174966e-07, "loss": 0.1896, "step": 11793 }, { "epoch": 0.9414863893988984, "grad_norm": 0.2819652707202588, "learning_rate": 1.7904138862383513e-07, "loss": 0.1388, "step": 11794 }, { "epoch": 0.9415662169713419, "grad_norm": 0.2991974518563662, "learning_rate": 1.7855466311813896e-07, "loss": 0.1906, "step": 11795 }, { "epoch": 0.9416460445437854, "grad_norm": 0.32632232297325886, "learning_rate": 1.7806859413719624e-07, "loss": 0.1865, "step": 11796 }, { "epoch": 0.9417258721162289, "grad_norm": 0.3157629916078454, "learning_rate": 1.7758318171349876e-07, "loss": 0.1588, "step": 11797 }, { "epoch": 0.9418056996886724, "grad_norm": 0.3603238085899964, "learning_rate": 1.770984258794939e-07, "loss": 0.1453, "step": 11798 }, { "epoch": 0.941885527261116, "grad_norm": 0.28700044809136327, "learning_rate": 1.7661432666758572e-07, "loss": 0.1335, "step": 11799 }, { "epoch": 0.9419653548335595, "grad_norm": 0.2924130279635238, "learning_rate": 1.761308841101339e-07, "loss": 0.1492, "step": 11800 }, { "epoch": 0.942045182406003, "grad_norm": 0.2731512105758719, "learning_rate": 1.7564809823945595e-07, "loss": 0.1657, "step": 11801 }, { "epoch": 0.9421250099784465, "grad_norm": 0.30625094825331645, "learning_rate": 1.751659690878238e-07, "loss": 0.1792, "step": 11802 }, { "epoch": 0.9422048375508901, "grad_norm": 0.30316697692304606, "learning_rate": 1.7468449668746502e-07, "loss": 0.1645, "step": 11803 }, { "epoch": 0.9422846651233336, "grad_norm": 0.30915879495281945, "learning_rate": 1.7420368107056496e-07, "loss": 0.1705, "step": 11804 }, { "epoch": 0.9423644926957772, "grad_norm": 0.27885875000805443, "learning_rate": 1.737235222692646e-07, "loss": 0.1468, "step": 11805 }, { "epoch": 0.9424443202682207, "grad_norm": 0.2941535527732126, "learning_rate": 1.7324402031565935e-07, "loss": 0.1558, "step": 11806 }, { "epoch": 0.9425241478406642, "grad_norm": 0.23716995300548993, "learning_rate": 1.7276517524180247e-07, "loss": 0.1268, "step": 11807 }, { "epoch": 0.9426039754131077, "grad_norm": 0.25650056994982195, "learning_rate": 1.7228698707970283e-07, "loss": 0.1657, "step": 11808 }, { "epoch": 0.9426838029855512, "grad_norm": 0.30929242175660615, "learning_rate": 1.7180945586132703e-07, "loss": 0.1559, "step": 11809 }, { "epoch": 0.9427636305579947, "grad_norm": 0.3090359194556696, "learning_rate": 1.7133258161859402e-07, "loss": 0.1608, "step": 11810 }, { "epoch": 0.9428434581304382, "grad_norm": 0.3199276631269505, "learning_rate": 1.708563643833816e-07, "loss": 0.1866, "step": 11811 }, { "epoch": 0.9429232857028818, "grad_norm": 0.321069614142457, "learning_rate": 1.7038080418752433e-07, "loss": 0.1716, "step": 11812 }, { "epoch": 0.9430031132753253, "grad_norm": 0.29440946475436913, "learning_rate": 1.6990590106280791e-07, "loss": 0.166, "step": 11813 }, { "epoch": 0.9430829408477688, "grad_norm": 0.30603547304254036, "learning_rate": 1.6943165504098247e-07, "loss": 0.147, "step": 11814 }, { "epoch": 0.9431627684202123, "grad_norm": 0.3257658538910751, "learning_rate": 1.6895806615374598e-07, "loss": 0.1565, "step": 11815 }, { "epoch": 0.9432425959926558, "grad_norm": 0.25418509678928003, "learning_rate": 1.684851344327576e-07, "loss": 0.1327, "step": 11816 }, { "epoch": 0.9433224235650994, "grad_norm": 0.27043282651164186, "learning_rate": 1.680128599096309e-07, "loss": 0.1433, "step": 11817 }, { "epoch": 0.943402251137543, "grad_norm": 0.2814908715058157, "learning_rate": 1.6754124261593397e-07, "loss": 0.1524, "step": 11818 }, { "epoch": 0.9434820787099865, "grad_norm": 0.24566118525368213, "learning_rate": 1.670702825831949e-07, "loss": 0.1074, "step": 11819 }, { "epoch": 0.94356190628243, "grad_norm": 0.2922896710962474, "learning_rate": 1.665999798428941e-07, "loss": 0.1637, "step": 11820 }, { "epoch": 0.9436417338548735, "grad_norm": 0.26489848203612065, "learning_rate": 1.6613033442646975e-07, "loss": 0.1512, "step": 11821 }, { "epoch": 0.943721561427317, "grad_norm": 0.339873121632147, "learning_rate": 1.6566134636531562e-07, "loss": 0.17, "step": 11822 }, { "epoch": 0.9438013889997605, "grad_norm": 0.25350563700679124, "learning_rate": 1.6519301569078328e-07, "loss": 0.1949, "step": 11823 }, { "epoch": 0.943881216572204, "grad_norm": 0.2907007281319147, "learning_rate": 1.647253424341755e-07, "loss": 0.1894, "step": 11824 }, { "epoch": 0.9439610441446475, "grad_norm": 0.33442794385861335, "learning_rate": 1.642583266267561e-07, "loss": 0.1439, "step": 11825 }, { "epoch": 0.9440408717170911, "grad_norm": 0.2488076722202954, "learning_rate": 1.6379196829974352e-07, "loss": 0.1831, "step": 11826 }, { "epoch": 0.9441206992895346, "grad_norm": 0.30434886534455796, "learning_rate": 1.633262674843128e-07, "loss": 0.1805, "step": 11827 }, { "epoch": 0.9442005268619781, "grad_norm": 0.3284214713451295, "learning_rate": 1.6286122421159345e-07, "loss": 0.1865, "step": 11828 }, { "epoch": 0.9442803544344216, "grad_norm": 0.2790583735088625, "learning_rate": 1.6239683851267062e-07, "loss": 0.1641, "step": 11829 }, { "epoch": 0.9443601820068652, "grad_norm": 0.24362722361807657, "learning_rate": 1.6193311041858838e-07, "loss": 0.1226, "step": 11830 }, { "epoch": 0.9444400095793087, "grad_norm": 0.257220336132141, "learning_rate": 1.6147003996034304e-07, "loss": 0.1745, "step": 11831 }, { "epoch": 0.9445198371517523, "grad_norm": 0.22756096932696818, "learning_rate": 1.6100762716889207e-07, "loss": 0.1511, "step": 11832 }, { "epoch": 0.9445996647241958, "grad_norm": 0.3232684073795046, "learning_rate": 1.6054587207514183e-07, "loss": 0.1598, "step": 11833 }, { "epoch": 0.9446794922966393, "grad_norm": 0.2773002164777897, "learning_rate": 1.6008477470996319e-07, "loss": 0.1702, "step": 11834 }, { "epoch": 0.9447593198690828, "grad_norm": 0.3256812566078533, "learning_rate": 1.5962433510417597e-07, "loss": 0.1763, "step": 11835 }, { "epoch": 0.9448391474415263, "grad_norm": 0.2520873948066254, "learning_rate": 1.5916455328855996e-07, "loss": 0.1493, "step": 11836 }, { "epoch": 0.9449189750139698, "grad_norm": 0.30022622382923636, "learning_rate": 1.5870542929384837e-07, "loss": 0.1772, "step": 11837 }, { "epoch": 0.9449988025864133, "grad_norm": 0.35924378980242294, "learning_rate": 1.5824696315073219e-07, "loss": 0.1647, "step": 11838 }, { "epoch": 0.9450786301588568, "grad_norm": 0.2632654496272762, "learning_rate": 1.5778915488986024e-07, "loss": 0.1558, "step": 11839 }, { "epoch": 0.9451584577313004, "grad_norm": 0.28508647766565604, "learning_rate": 1.573320045418325e-07, "loss": 0.1298, "step": 11840 }, { "epoch": 0.9452382853037439, "grad_norm": 0.2542527718659091, "learning_rate": 1.5687551213720897e-07, "loss": 0.2022, "step": 11841 }, { "epoch": 0.9453181128761874, "grad_norm": 0.2772569736830849, "learning_rate": 1.5641967770650413e-07, "loss": 0.1982, "step": 11842 }, { "epoch": 0.9453979404486309, "grad_norm": 0.28836989389485906, "learning_rate": 1.5596450128018804e-07, "loss": 0.1689, "step": 11843 }, { "epoch": 0.9454777680210745, "grad_norm": 0.2543002717244677, "learning_rate": 1.5550998288868858e-07, "loss": 0.1809, "step": 11844 }, { "epoch": 0.945557595593518, "grad_norm": 0.2796809918653201, "learning_rate": 1.55056122562387e-07, "loss": 0.2133, "step": 11845 }, { "epoch": 0.9456374231659616, "grad_norm": 0.3277278688536216, "learning_rate": 1.546029203316246e-07, "loss": 0.1522, "step": 11846 }, { "epoch": 0.9457172507384051, "grad_norm": 0.24584482449224931, "learning_rate": 1.541503762266927e-07, "loss": 0.1341, "step": 11847 }, { "epoch": 0.9457970783108486, "grad_norm": 0.2813131598466475, "learning_rate": 1.536984902778449e-07, "loss": 0.1754, "step": 11848 }, { "epoch": 0.9458769058832921, "grad_norm": 0.26212244108068716, "learning_rate": 1.532472625152881e-07, "loss": 0.1343, "step": 11849 }, { "epoch": 0.9459567334557356, "grad_norm": 0.2822866528310148, "learning_rate": 1.5279669296918265e-07, "loss": 0.1395, "step": 11850 }, { "epoch": 0.9460365610281791, "grad_norm": 0.27885031248071507, "learning_rate": 1.5234678166965e-07, "loss": 0.1536, "step": 11851 }, { "epoch": 0.9461163886006226, "grad_norm": 0.29002644017171353, "learning_rate": 1.5189752864676165e-07, "loss": 0.1866, "step": 11852 }, { "epoch": 0.9461962161730662, "grad_norm": 0.27731886310517717, "learning_rate": 1.5144893393055248e-07, "loss": 0.1576, "step": 11853 }, { "epoch": 0.9462760437455097, "grad_norm": 0.26325113614024404, "learning_rate": 1.5100099755100627e-07, "loss": 0.1548, "step": 11854 }, { "epoch": 0.9463558713179532, "grad_norm": 0.24815825819182402, "learning_rate": 1.5055371953806685e-07, "loss": 0.1435, "step": 11855 }, { "epoch": 0.9464356988903967, "grad_norm": 0.25768894321920943, "learning_rate": 1.5010709992163254e-07, "loss": 0.152, "step": 11856 }, { "epoch": 0.9465155264628403, "grad_norm": 0.2920368498178466, "learning_rate": 1.496611387315583e-07, "loss": 0.1391, "step": 11857 }, { "epoch": 0.9465953540352838, "grad_norm": 0.25260753801736247, "learning_rate": 1.4921583599765588e-07, "loss": 0.1757, "step": 11858 }, { "epoch": 0.9466751816077273, "grad_norm": 0.25647614858222156, "learning_rate": 1.4877119174969034e-07, "loss": 0.1859, "step": 11859 }, { "epoch": 0.9467550091801709, "grad_norm": 0.26379089088389884, "learning_rate": 1.483272060173857e-07, "loss": 0.1417, "step": 11860 }, { "epoch": 0.9468348367526144, "grad_norm": 0.25721138059974563, "learning_rate": 1.4788387883041933e-07, "loss": 0.1567, "step": 11861 }, { "epoch": 0.9469146643250579, "grad_norm": 0.32958464603676957, "learning_rate": 1.474412102184275e-07, "loss": 0.1689, "step": 11862 }, { "epoch": 0.9469944918975014, "grad_norm": 0.24807080002388948, "learning_rate": 1.469992002109999e-07, "loss": 0.172, "step": 11863 }, { "epoch": 0.9470743194699449, "grad_norm": 0.3094265038016835, "learning_rate": 1.4655784883768288e-07, "loss": 0.1875, "step": 11864 }, { "epoch": 0.9471541470423884, "grad_norm": 0.3145231268552667, "learning_rate": 1.461171561279806e-07, "loss": 0.1786, "step": 11865 }, { "epoch": 0.9472339746148319, "grad_norm": 0.27008563914119654, "learning_rate": 1.4567712211134844e-07, "loss": 0.1692, "step": 11866 }, { "epoch": 0.9473138021872755, "grad_norm": 0.2872712290284148, "learning_rate": 1.4523774681720505e-07, "loss": 0.1769, "step": 11867 }, { "epoch": 0.947393629759719, "grad_norm": 0.2664778832419718, "learning_rate": 1.4479903027491693e-07, "loss": 0.1487, "step": 11868 }, { "epoch": 0.9474734573321625, "grad_norm": 0.30613748784989697, "learning_rate": 1.4436097251381398e-07, "loss": 0.1735, "step": 11869 }, { "epoch": 0.947553284904606, "grad_norm": 0.30851634811020484, "learning_rate": 1.43923573563175e-07, "loss": 0.1433, "step": 11870 }, { "epoch": 0.9476331124770496, "grad_norm": 0.3053005594677708, "learning_rate": 1.4348683345224325e-07, "loss": 0.1599, "step": 11871 }, { "epoch": 0.9477129400494931, "grad_norm": 0.30658207631320405, "learning_rate": 1.430507522102087e-07, "loss": 0.1469, "step": 11872 }, { "epoch": 0.9477927676219366, "grad_norm": 0.2572812957549869, "learning_rate": 1.4261532986622363e-07, "loss": 0.1644, "step": 11873 }, { "epoch": 0.9478725951943802, "grad_norm": 0.27568600701172746, "learning_rate": 1.421805664493936e-07, "loss": 0.2175, "step": 11874 }, { "epoch": 0.9479524227668237, "grad_norm": 0.26358745382950644, "learning_rate": 1.4174646198878096e-07, "loss": 0.1711, "step": 11875 }, { "epoch": 0.9480322503392672, "grad_norm": 0.3287316474208162, "learning_rate": 1.4131301651340578e-07, "loss": 0.1599, "step": 11876 }, { "epoch": 0.9481120779117107, "grad_norm": 0.27517375983660913, "learning_rate": 1.4088023005223828e-07, "loss": 0.1443, "step": 11877 }, { "epoch": 0.9481919054841542, "grad_norm": 0.29519480563548867, "learning_rate": 1.4044810263421194e-07, "loss": 0.1783, "step": 11878 }, { "epoch": 0.9482717330565977, "grad_norm": 0.3064289665751639, "learning_rate": 1.4001663428821032e-07, "loss": 0.1617, "step": 11879 }, { "epoch": 0.9483515606290412, "grad_norm": 0.2806509863528294, "learning_rate": 1.3958582504307815e-07, "loss": 0.1721, "step": 11880 }, { "epoch": 0.9484313882014848, "grad_norm": 0.3060120541102586, "learning_rate": 1.3915567492761128e-07, "loss": 0.1588, "step": 11881 }, { "epoch": 0.9485112157739283, "grad_norm": 0.28820149679632223, "learning_rate": 1.3872618397056336e-07, "loss": 0.1345, "step": 11882 }, { "epoch": 0.9485910433463718, "grad_norm": 0.2712528377391919, "learning_rate": 1.3829735220064478e-07, "loss": 0.1525, "step": 11883 }, { "epoch": 0.9486708709188154, "grad_norm": 0.28936231638996546, "learning_rate": 1.3786917964652147e-07, "loss": 0.1637, "step": 11884 }, { "epoch": 0.9487506984912589, "grad_norm": 0.29924883075806646, "learning_rate": 1.3744166633681611e-07, "loss": 0.169, "step": 11885 }, { "epoch": 0.9488305260637024, "grad_norm": 0.28737922939584504, "learning_rate": 1.370148123001036e-07, "loss": 0.1783, "step": 11886 }, { "epoch": 0.948910353636146, "grad_norm": 0.27602138697643647, "learning_rate": 1.3658861756491893e-07, "loss": 0.1633, "step": 11887 }, { "epoch": 0.9489901812085895, "grad_norm": 0.24167568940012826, "learning_rate": 1.361630821597526e-07, "loss": 0.1376, "step": 11888 }, { "epoch": 0.949070008781033, "grad_norm": 0.28795020175234326, "learning_rate": 1.3573820611304745e-07, "loss": 0.1244, "step": 11889 }, { "epoch": 0.9491498363534765, "grad_norm": 0.28728503095742686, "learning_rate": 1.353139894532074e-07, "loss": 0.1795, "step": 11890 }, { "epoch": 0.94922966392592, "grad_norm": 0.24516359206164706, "learning_rate": 1.3489043220858755e-07, "loss": 0.1465, "step": 11891 }, { "epoch": 0.9493094914983635, "grad_norm": 0.2818807642926014, "learning_rate": 1.3446753440750193e-07, "loss": 0.1133, "step": 11892 }, { "epoch": 0.949389319070807, "grad_norm": 0.33241612906775475, "learning_rate": 1.3404529607822015e-07, "loss": 0.1534, "step": 11893 }, { "epoch": 0.9494691466432505, "grad_norm": 0.27919648608424136, "learning_rate": 1.336237172489663e-07, "loss": 0.1606, "step": 11894 }, { "epoch": 0.9495489742156941, "grad_norm": 0.2744127331172802, "learning_rate": 1.3320279794792223e-07, "loss": 0.1403, "step": 11895 }, { "epoch": 0.9496288017881376, "grad_norm": 0.29196048825425647, "learning_rate": 1.3278253820322218e-07, "loss": 0.1795, "step": 11896 }, { "epoch": 0.9497086293605811, "grad_norm": 0.3291466232048431, "learning_rate": 1.323629380429625e-07, "loss": 0.1655, "step": 11897 }, { "epoch": 0.9497884569330247, "grad_norm": 0.3240524480663686, "learning_rate": 1.3194399749518972e-07, "loss": 0.2307, "step": 11898 }, { "epoch": 0.9498682845054682, "grad_norm": 0.32439142013890465, "learning_rate": 1.3152571658790803e-07, "loss": 0.1804, "step": 11899 }, { "epoch": 0.9499481120779117, "grad_norm": 0.3065817750496142, "learning_rate": 1.3110809534907844e-07, "loss": 0.1705, "step": 11900 }, { "epoch": 0.9500279396503553, "grad_norm": 0.29446909194154225, "learning_rate": 1.3069113380661745e-07, "loss": 0.17, "step": 11901 }, { "epoch": 0.9501077672227988, "grad_norm": 0.2802087878415715, "learning_rate": 1.302748319883973e-07, "loss": 0.1934, "step": 11902 }, { "epoch": 0.9501875947952423, "grad_norm": 0.26806855454085016, "learning_rate": 1.2985918992224677e-07, "loss": 0.1722, "step": 11903 }, { "epoch": 0.9502674223676858, "grad_norm": 0.24298042668904918, "learning_rate": 1.2944420763594812e-07, "loss": 0.219, "step": 11904 }, { "epoch": 0.9503472499401293, "grad_norm": 0.256091299108889, "learning_rate": 1.2902988515724136e-07, "loss": 0.1518, "step": 11905 }, { "epoch": 0.9504270775125728, "grad_norm": 0.28961507883656307, "learning_rate": 1.2861622251382432e-07, "loss": 0.1446, "step": 11906 }, { "epoch": 0.9505069050850163, "grad_norm": 0.23711292216239582, "learning_rate": 1.2820321973334826e-07, "loss": 0.1715, "step": 11907 }, { "epoch": 0.9505867326574599, "grad_norm": 0.2917853646000416, "learning_rate": 1.2779087684341885e-07, "loss": 0.1771, "step": 11908 }, { "epoch": 0.9506665602299034, "grad_norm": 0.31356341464061865, "learning_rate": 1.273791938716018e-07, "loss": 0.1807, "step": 11909 }, { "epoch": 0.9507463878023469, "grad_norm": 0.2597740142111035, "learning_rate": 1.26968170845414e-07, "loss": 0.1648, "step": 11910 }, { "epoch": 0.9508262153747905, "grad_norm": 0.32661260746123855, "learning_rate": 1.2655780779233463e-07, "loss": 0.1998, "step": 11911 }, { "epoch": 0.950906042947234, "grad_norm": 0.27374140826715293, "learning_rate": 1.2614810473979056e-07, "loss": 0.1458, "step": 11912 }, { "epoch": 0.9509858705196775, "grad_norm": 0.2951739917505739, "learning_rate": 1.2573906171517104e-07, "loss": 0.1649, "step": 11913 }, { "epoch": 0.951065698092121, "grad_norm": 0.31175042351994475, "learning_rate": 1.253306787458175e-07, "loss": 0.1309, "step": 11914 }, { "epoch": 0.9511455256645646, "grad_norm": 0.26913664086396544, "learning_rate": 1.2492295585903258e-07, "loss": 0.1683, "step": 11915 }, { "epoch": 0.9512253532370081, "grad_norm": 0.307042292246289, "learning_rate": 1.2451589308206557e-07, "loss": 0.1403, "step": 11916 }, { "epoch": 0.9513051808094516, "grad_norm": 0.2750989575642064, "learning_rate": 1.241094904421314e-07, "loss": 0.1257, "step": 11917 }, { "epoch": 0.9513850083818951, "grad_norm": 0.27695546007169725, "learning_rate": 1.2370374796639383e-07, "loss": 0.1435, "step": 11918 }, { "epoch": 0.9514648359543386, "grad_norm": 0.28547716622849806, "learning_rate": 1.232986656819757e-07, "loss": 0.184, "step": 11919 }, { "epoch": 0.9515446635267821, "grad_norm": 0.3228908219201273, "learning_rate": 1.2289424361595749e-07, "loss": 0.1205, "step": 11920 }, { "epoch": 0.9516244910992256, "grad_norm": 0.28820766970722644, "learning_rate": 1.224904817953687e-07, "loss": 0.1761, "step": 11921 }, { "epoch": 0.9517043186716692, "grad_norm": 0.2820394816070254, "learning_rate": 1.2208738024720334e-07, "loss": 0.1412, "step": 11922 }, { "epoch": 0.9517841462441127, "grad_norm": 0.254552347335202, "learning_rate": 1.2168493899840428e-07, "loss": 0.1817, "step": 11923 }, { "epoch": 0.9518639738165562, "grad_norm": 0.2653263325821175, "learning_rate": 1.2128315807587666e-07, "loss": 0.1717, "step": 11924 }, { "epoch": 0.9519438013889998, "grad_norm": 0.3106187345906151, "learning_rate": 1.2088203750647344e-07, "loss": 0.1427, "step": 11925 }, { "epoch": 0.9520236289614433, "grad_norm": 0.30405999242231163, "learning_rate": 1.2048157731701093e-07, "loss": 0.1336, "step": 11926 }, { "epoch": 0.9521034565338868, "grad_norm": 0.2344099444051499, "learning_rate": 1.2008177753425776e-07, "loss": 0.1382, "step": 11927 }, { "epoch": 0.9521832841063304, "grad_norm": 0.3090317105665658, "learning_rate": 1.196826381849381e-07, "loss": 0.1619, "step": 11928 }, { "epoch": 0.9522631116787739, "grad_norm": 0.23654883005489777, "learning_rate": 1.192841592957339e-07, "loss": 0.1513, "step": 11929 }, { "epoch": 0.9523429392512174, "grad_norm": 0.30418082685962067, "learning_rate": 1.1888634089328166e-07, "loss": 0.1294, "step": 11930 }, { "epoch": 0.9524227668236609, "grad_norm": 0.2785204864566731, "learning_rate": 1.1848918300417345e-07, "loss": 0.1361, "step": 11931 }, { "epoch": 0.9525025943961044, "grad_norm": 0.2415959667011634, "learning_rate": 1.1809268565495802e-07, "loss": 0.1488, "step": 11932 }, { "epoch": 0.9525824219685479, "grad_norm": 0.2838853854815163, "learning_rate": 1.1769684887213972e-07, "loss": 0.1644, "step": 11933 }, { "epoch": 0.9526622495409914, "grad_norm": 0.2453054610994088, "learning_rate": 1.1730167268217962e-07, "loss": 0.1525, "step": 11934 }, { "epoch": 0.952742077113435, "grad_norm": 0.28788168608850445, "learning_rate": 1.1690715711149103e-07, "loss": 0.1753, "step": 11935 }, { "epoch": 0.9528219046858785, "grad_norm": 0.27755030899866967, "learning_rate": 1.165133021864484e-07, "loss": 0.1593, "step": 11936 }, { "epoch": 0.952901732258322, "grad_norm": 0.2785585406294853, "learning_rate": 1.1612010793337847e-07, "loss": 0.1105, "step": 11937 }, { "epoch": 0.9529815598307656, "grad_norm": 0.2609914548077226, "learning_rate": 1.1572757437856464e-07, "loss": 0.2285, "step": 11938 }, { "epoch": 0.9530613874032091, "grad_norm": 0.2463051188174351, "learning_rate": 1.1533570154824592e-07, "loss": 0.1615, "step": 11939 }, { "epoch": 0.9531412149756526, "grad_norm": 0.2747008886960184, "learning_rate": 1.1494448946861692e-07, "loss": 0.1478, "step": 11940 }, { "epoch": 0.9532210425480961, "grad_norm": 0.24348134255890239, "learning_rate": 1.1455393816583005e-07, "loss": 0.1761, "step": 11941 }, { "epoch": 0.9533008701205397, "grad_norm": 0.29297673746038383, "learning_rate": 1.141640476659922e-07, "loss": 0.1718, "step": 11942 }, { "epoch": 0.9533806976929832, "grad_norm": 0.27412734521336, "learning_rate": 1.1377481799516476e-07, "loss": 0.143, "step": 11943 }, { "epoch": 0.9534605252654267, "grad_norm": 0.2842035533322958, "learning_rate": 1.1338624917936691e-07, "loss": 0.1531, "step": 11944 }, { "epoch": 0.9535403528378702, "grad_norm": 0.3112735696065244, "learning_rate": 1.1299834124457343e-07, "loss": 0.1798, "step": 11945 }, { "epoch": 0.9536201804103137, "grad_norm": 0.3045083445407985, "learning_rate": 1.1261109421671245e-07, "loss": 0.1517, "step": 11946 }, { "epoch": 0.9537000079827572, "grad_norm": 0.29802546909623945, "learning_rate": 1.1222450812167218e-07, "loss": 0.199, "step": 11947 }, { "epoch": 0.9537798355552007, "grad_norm": 0.2802961114064527, "learning_rate": 1.1183858298529415e-07, "loss": 0.1743, "step": 11948 }, { "epoch": 0.9538596631276443, "grad_norm": 0.2525631544338611, "learning_rate": 1.114533188333733e-07, "loss": 0.1331, "step": 11949 }, { "epoch": 0.9539394907000878, "grad_norm": 0.28190375572548476, "learning_rate": 1.1106871569166677e-07, "loss": 0.1961, "step": 11950 }, { "epoch": 0.9540193182725313, "grad_norm": 0.24331865681826226, "learning_rate": 1.106847735858807e-07, "loss": 0.1317, "step": 11951 }, { "epoch": 0.9540991458449749, "grad_norm": 0.2723337287815173, "learning_rate": 1.1030149254168232e-07, "loss": 0.1465, "step": 11952 }, { "epoch": 0.9541789734174184, "grad_norm": 0.2860260309015992, "learning_rate": 1.0991887258469003e-07, "loss": 0.1521, "step": 11953 }, { "epoch": 0.9542588009898619, "grad_norm": 0.29304822198267777, "learning_rate": 1.0953691374048225e-07, "loss": 0.2119, "step": 11954 }, { "epoch": 0.9543386285623054, "grad_norm": 0.3001637187043789, "learning_rate": 1.0915561603459301e-07, "loss": 0.1335, "step": 11955 }, { "epoch": 0.954418456134749, "grad_norm": 0.2576306383539726, "learning_rate": 1.0877497949250638e-07, "loss": 0.2146, "step": 11956 }, { "epoch": 0.9544982837071925, "grad_norm": 0.24597405422040391, "learning_rate": 1.0839500413966975e-07, "loss": 0.1402, "step": 11957 }, { "epoch": 0.954578111279636, "grad_norm": 0.27056362315466576, "learning_rate": 1.080156900014806e-07, "loss": 0.1415, "step": 11958 }, { "epoch": 0.9546579388520795, "grad_norm": 0.26861712868820475, "learning_rate": 1.0763703710329754e-07, "loss": 0.1409, "step": 11959 }, { "epoch": 0.954737766424523, "grad_norm": 0.2936166121905875, "learning_rate": 1.0725904547042921e-07, "loss": 0.1751, "step": 11960 }, { "epoch": 0.9548175939969665, "grad_norm": 0.2494576887138649, "learning_rate": 1.0688171512814316e-07, "loss": 0.1376, "step": 11961 }, { "epoch": 0.95489742156941, "grad_norm": 0.27418030274103056, "learning_rate": 1.0650504610166479e-07, "loss": 0.1586, "step": 11962 }, { "epoch": 0.9549772491418536, "grad_norm": 0.29410698379016725, "learning_rate": 1.0612903841617061e-07, "loss": 0.1503, "step": 11963 }, { "epoch": 0.9550570767142971, "grad_norm": 0.26198093043795323, "learning_rate": 1.057536920967961e-07, "loss": 0.1395, "step": 11964 }, { "epoch": 0.9551369042867407, "grad_norm": 0.2864671096567775, "learning_rate": 1.0537900716863114e-07, "loss": 0.1708, "step": 11965 }, { "epoch": 0.9552167318591842, "grad_norm": 0.3005567990956585, "learning_rate": 1.0500498365672351e-07, "loss": 0.1528, "step": 11966 }, { "epoch": 0.9552965594316277, "grad_norm": 0.2804693466775698, "learning_rate": 1.046316215860721e-07, "loss": 0.169, "step": 11967 }, { "epoch": 0.9553763870040712, "grad_norm": 0.26141960676843123, "learning_rate": 1.0425892098163914e-07, "loss": 0.1615, "step": 11968 }, { "epoch": 0.9554562145765148, "grad_norm": 0.30730715494195354, "learning_rate": 1.0388688186833473e-07, "loss": 0.1253, "step": 11969 }, { "epoch": 0.9555360421489583, "grad_norm": 0.26606835700976605, "learning_rate": 1.0351550427102786e-07, "loss": 0.1391, "step": 11970 }, { "epoch": 0.9556158697214018, "grad_norm": 0.2807957258321732, "learning_rate": 1.0314478821454643e-07, "loss": 0.1276, "step": 11971 }, { "epoch": 0.9556956972938453, "grad_norm": 0.29072152455131267, "learning_rate": 1.0277473372366952e-07, "loss": 0.0973, "step": 11972 }, { "epoch": 0.9557755248662888, "grad_norm": 0.28079630982953224, "learning_rate": 1.0240534082313403e-07, "loss": 0.1982, "step": 11973 }, { "epoch": 0.9558553524387323, "grad_norm": 0.2821601936690328, "learning_rate": 1.0203660953763128e-07, "loss": 0.1491, "step": 11974 }, { "epoch": 0.9559351800111758, "grad_norm": 0.2694870240642593, "learning_rate": 1.0166853989181268e-07, "loss": 0.1317, "step": 11975 }, { "epoch": 0.9560150075836193, "grad_norm": 0.282362606938501, "learning_rate": 1.0130113191027968e-07, "loss": 0.1466, "step": 11976 }, { "epoch": 0.9560948351560629, "grad_norm": 0.29845707390374254, "learning_rate": 1.0093438561759261e-07, "loss": 0.1542, "step": 11977 }, { "epoch": 0.9561746627285064, "grad_norm": 0.27492207421501524, "learning_rate": 1.0056830103826742e-07, "loss": 0.1664, "step": 11978 }, { "epoch": 0.95625449030095, "grad_norm": 0.2586406947874029, "learning_rate": 1.0020287819677455e-07, "loss": 0.1813, "step": 11979 }, { "epoch": 0.9563343178733935, "grad_norm": 0.2861435808999329, "learning_rate": 9.98381171175422e-08, "loss": 0.1377, "step": 11980 }, { "epoch": 0.956414145445837, "grad_norm": 0.2941247584934626, "learning_rate": 9.947401782495203e-08, "loss": 0.1731, "step": 11981 }, { "epoch": 0.9564939730182805, "grad_norm": 0.31579373375821784, "learning_rate": 9.911058034334342e-08, "loss": 0.143, "step": 11982 }, { "epoch": 0.956573800590724, "grad_norm": 0.2948096074032676, "learning_rate": 9.874780469701028e-08, "loss": 0.1492, "step": 11983 }, { "epoch": 0.9566536281631676, "grad_norm": 0.3126029332248486, "learning_rate": 9.838569091020323e-08, "loss": 0.1409, "step": 11984 }, { "epoch": 0.9567334557356111, "grad_norm": 0.2950323263963206, "learning_rate": 9.802423900712732e-08, "loss": 0.158, "step": 11985 }, { "epoch": 0.9568132833080546, "grad_norm": 0.30923896317334937, "learning_rate": 9.766344901194546e-08, "loss": 0.1919, "step": 11986 }, { "epoch": 0.9568931108804981, "grad_norm": 0.29145039071802853, "learning_rate": 9.730332094877393e-08, "loss": 0.1634, "step": 11987 }, { "epoch": 0.9569729384529416, "grad_norm": 0.3212205650796668, "learning_rate": 9.694385484168678e-08, "loss": 0.1757, "step": 11988 }, { "epoch": 0.9570527660253851, "grad_norm": 0.31818425060518146, "learning_rate": 9.658505071471147e-08, "loss": 0.1549, "step": 11989 }, { "epoch": 0.9571325935978287, "grad_norm": 0.2907083405281858, "learning_rate": 9.622690859183436e-08, "loss": 0.1676, "step": 11990 }, { "epoch": 0.9572124211702722, "grad_norm": 0.26464036492914317, "learning_rate": 9.58694284969941e-08, "loss": 0.1621, "step": 11991 }, { "epoch": 0.9572922487427157, "grad_norm": 0.3143723006530978, "learning_rate": 9.551261045408932e-08, "loss": 0.1595, "step": 11992 }, { "epoch": 0.9573720763151593, "grad_norm": 0.25974578095606227, "learning_rate": 9.515645448696876e-08, "loss": 0.1818, "step": 11993 }, { "epoch": 0.9574519038876028, "grad_norm": 0.31325820007586136, "learning_rate": 9.480096061944221e-08, "loss": 0.1382, "step": 11994 }, { "epoch": 0.9575317314600463, "grad_norm": 0.3110146804379609, "learning_rate": 9.444612887527182e-08, "loss": 0.135, "step": 11995 }, { "epoch": 0.9576115590324898, "grad_norm": 0.28797550300515345, "learning_rate": 9.40919592781775e-08, "loss": 0.155, "step": 11996 }, { "epoch": 0.9576913866049334, "grad_norm": 0.30908416042674397, "learning_rate": 9.373845185183361e-08, "loss": 0.1867, "step": 11997 }, { "epoch": 0.9577712141773769, "grad_norm": 0.3084950521485922, "learning_rate": 9.338560661987128e-08, "loss": 0.1525, "step": 11998 }, { "epoch": 0.9578510417498204, "grad_norm": 0.2992108238065566, "learning_rate": 9.30334236058772e-08, "loss": 0.1856, "step": 11999 }, { "epoch": 0.9579308693222639, "grad_norm": 0.32102132815997303, "learning_rate": 9.268190283339251e-08, "loss": 0.1293, "step": 12000 }, { "epoch": 0.9580106968947074, "grad_norm": 0.31858689350029534, "learning_rate": 9.2331044325914e-08, "loss": 0.1722, "step": 12001 }, { "epoch": 0.9580905244671509, "grad_norm": 0.2970349525999518, "learning_rate": 9.198084810689733e-08, "loss": 0.1596, "step": 12002 }, { "epoch": 0.9581703520395944, "grad_norm": 0.24639595857404278, "learning_rate": 9.163131419975047e-08, "loss": 0.1392, "step": 12003 }, { "epoch": 0.958250179612038, "grad_norm": 0.2771480869106839, "learning_rate": 9.128244262783802e-08, "loss": 0.2011, "step": 12004 }, { "epoch": 0.9583300071844815, "grad_norm": 0.2782063289873689, "learning_rate": 9.093423341448248e-08, "loss": 0.1532, "step": 12005 }, { "epoch": 0.9584098347569251, "grad_norm": 0.25612452052410756, "learning_rate": 9.058668658295856e-08, "loss": 0.1018, "step": 12006 }, { "epoch": 0.9584896623293686, "grad_norm": 0.2845948415654336, "learning_rate": 9.023980215649875e-08, "loss": 0.1284, "step": 12007 }, { "epoch": 0.9585694899018121, "grad_norm": 0.2987269007944771, "learning_rate": 8.989358015829231e-08, "loss": 0.1452, "step": 12008 }, { "epoch": 0.9586493174742556, "grad_norm": 0.26562801302088723, "learning_rate": 8.95480206114796e-08, "loss": 0.1494, "step": 12009 }, { "epoch": 0.9587291450466991, "grad_norm": 0.2830797197155413, "learning_rate": 8.920312353916328e-08, "loss": 0.1427, "step": 12010 }, { "epoch": 0.9588089726191427, "grad_norm": 0.3085081259528457, "learning_rate": 8.885888896439487e-08, "loss": 0.1735, "step": 12011 }, { "epoch": 0.9588888001915862, "grad_norm": 0.29491209259042195, "learning_rate": 8.851531691019044e-08, "loss": 0.1847, "step": 12012 }, { "epoch": 0.9589686277640297, "grad_norm": 0.29105124345113853, "learning_rate": 8.81724073995105e-08, "loss": 0.1598, "step": 12013 }, { "epoch": 0.9590484553364732, "grad_norm": 0.3006620173804023, "learning_rate": 8.783016045528115e-08, "loss": 0.1515, "step": 12014 }, { "epoch": 0.9591282829089167, "grad_norm": 0.3017026239669039, "learning_rate": 8.748857610037853e-08, "loss": 0.2007, "step": 12015 }, { "epoch": 0.9592081104813602, "grad_norm": 0.281075028560871, "learning_rate": 8.714765435763662e-08, "loss": 0.139, "step": 12016 }, { "epoch": 0.9592879380538037, "grad_norm": 0.2753609661363463, "learning_rate": 8.680739524984494e-08, "loss": 0.2053, "step": 12017 }, { "epoch": 0.9593677656262473, "grad_norm": 0.2720984007430241, "learning_rate": 8.646779879974754e-08, "loss": 0.1794, "step": 12018 }, { "epoch": 0.9594475931986908, "grad_norm": 0.2902004538837835, "learning_rate": 8.612886503004514e-08, "loss": 0.1444, "step": 12019 }, { "epoch": 0.9595274207711344, "grad_norm": 0.25752243845019823, "learning_rate": 8.579059396339628e-08, "loss": 0.156, "step": 12020 }, { "epoch": 0.9596072483435779, "grad_norm": 0.2671518165431927, "learning_rate": 8.545298562241067e-08, "loss": 0.1828, "step": 12021 }, { "epoch": 0.9596870759160214, "grad_norm": 0.2504670928267856, "learning_rate": 8.5116040029658e-08, "loss": 0.2198, "step": 12022 }, { "epoch": 0.9597669034884649, "grad_norm": 0.2610554363850732, "learning_rate": 8.477975720765918e-08, "loss": 0.1726, "step": 12023 }, { "epoch": 0.9598467310609085, "grad_norm": 0.33648033949843226, "learning_rate": 8.44441371788951e-08, "loss": 0.1541, "step": 12024 }, { "epoch": 0.959926558633352, "grad_norm": 0.31066937385849475, "learning_rate": 8.410917996580114e-08, "loss": 0.1934, "step": 12025 }, { "epoch": 0.9600063862057955, "grad_norm": 0.33989571900814797, "learning_rate": 8.377488559076718e-08, "loss": 0.2209, "step": 12026 }, { "epoch": 0.960086213778239, "grad_norm": 0.3048900958359382, "learning_rate": 8.344125407613867e-08, "loss": 0.1445, "step": 12027 }, { "epoch": 0.9601660413506825, "grad_norm": 0.3045641790495211, "learning_rate": 8.310828544421889e-08, "loss": 0.1292, "step": 12028 }, { "epoch": 0.960245868923126, "grad_norm": 0.2950053822752833, "learning_rate": 8.277597971726447e-08, "loss": 0.1623, "step": 12029 }, { "epoch": 0.9603256964955695, "grad_norm": 0.3016341715571078, "learning_rate": 8.244433691748987e-08, "loss": 0.1851, "step": 12030 }, { "epoch": 0.960405524068013, "grad_norm": 0.2876047396096596, "learning_rate": 8.211335706706292e-08, "loss": 0.1591, "step": 12031 }, { "epoch": 0.9604853516404566, "grad_norm": 0.30119530510155373, "learning_rate": 8.178304018810923e-08, "loss": 0.149, "step": 12032 }, { "epoch": 0.9605651792129002, "grad_norm": 0.25167818397409986, "learning_rate": 8.145338630270782e-08, "loss": 0.1597, "step": 12033 }, { "epoch": 0.9606450067853437, "grad_norm": 0.3291484833392964, "learning_rate": 8.112439543289663e-08, "loss": 0.1377, "step": 12034 }, { "epoch": 0.9607248343577872, "grad_norm": 0.27970456526917636, "learning_rate": 8.079606760066583e-08, "loss": 0.2073, "step": 12035 }, { "epoch": 0.9608046619302307, "grad_norm": 0.26150817163237866, "learning_rate": 8.046840282796342e-08, "loss": 0.1687, "step": 12036 }, { "epoch": 0.9608844895026742, "grad_norm": 0.2577522847291521, "learning_rate": 8.0141401136693e-08, "loss": 0.1633, "step": 12037 }, { "epoch": 0.9609643170751178, "grad_norm": 0.28243928213935854, "learning_rate": 7.981506254871152e-08, "loss": 0.1893, "step": 12038 }, { "epoch": 0.9610441446475613, "grad_norm": 0.27345029557988565, "learning_rate": 7.948938708583598e-08, "loss": 0.1515, "step": 12039 }, { "epoch": 0.9611239722200048, "grad_norm": 0.3139707256555082, "learning_rate": 7.916437476983562e-08, "loss": 0.1227, "step": 12040 }, { "epoch": 0.9612037997924483, "grad_norm": 0.30951258233860274, "learning_rate": 7.88400256224342e-08, "loss": 0.1621, "step": 12041 }, { "epoch": 0.9612836273648918, "grad_norm": 0.2889392924162159, "learning_rate": 7.851633966531657e-08, "loss": 0.1626, "step": 12042 }, { "epoch": 0.9613634549373353, "grad_norm": 0.2946204529400796, "learning_rate": 7.819331692011766e-08, "loss": 0.1425, "step": 12043 }, { "epoch": 0.9614432825097788, "grad_norm": 0.2825725618213703, "learning_rate": 7.787095740843021e-08, "loss": 0.1644, "step": 12044 }, { "epoch": 0.9615231100822224, "grad_norm": 0.27725301920059947, "learning_rate": 7.754926115180361e-08, "loss": 0.1278, "step": 12045 }, { "epoch": 0.9616029376546659, "grad_norm": 0.3112035874853183, "learning_rate": 7.722822817174181e-08, "loss": 0.1502, "step": 12046 }, { "epoch": 0.9616827652271095, "grad_norm": 0.31861872267216207, "learning_rate": 7.690785848970428e-08, "loss": 0.1876, "step": 12047 }, { "epoch": 0.961762592799553, "grad_norm": 0.2449153896008318, "learning_rate": 7.658815212710613e-08, "loss": 0.1122, "step": 12048 }, { "epoch": 0.9618424203719965, "grad_norm": 0.28194046241250253, "learning_rate": 7.626910910531915e-08, "loss": 0.201, "step": 12049 }, { "epoch": 0.96192224794444, "grad_norm": 0.2745996422914213, "learning_rate": 7.595072944566961e-08, "loss": 0.1455, "step": 12050 }, { "epoch": 0.9620020755168835, "grad_norm": 0.27712861116491566, "learning_rate": 7.563301316944049e-08, "loss": 0.1707, "step": 12051 }, { "epoch": 0.9620819030893271, "grad_norm": 0.2736751865724952, "learning_rate": 7.531596029786925e-08, "loss": 0.173, "step": 12052 }, { "epoch": 0.9621617306617706, "grad_norm": 0.24458783463497663, "learning_rate": 7.499957085215004e-08, "loss": 0.149, "step": 12053 }, { "epoch": 0.9622415582342141, "grad_norm": 0.2565981238716068, "learning_rate": 7.46838448534315e-08, "loss": 0.1321, "step": 12054 }, { "epoch": 0.9623213858066576, "grad_norm": 0.3010914221735452, "learning_rate": 7.436878232282007e-08, "loss": 0.1911, "step": 12055 }, { "epoch": 0.9624012133791011, "grad_norm": 0.2836595369752904, "learning_rate": 7.405438328137559e-08, "loss": 0.1713, "step": 12056 }, { "epoch": 0.9624810409515446, "grad_norm": 0.3320825417528032, "learning_rate": 7.374064775011347e-08, "loss": 0.1766, "step": 12057 }, { "epoch": 0.9625608685239881, "grad_norm": 0.2507128762573729, "learning_rate": 7.342757575000803e-08, "loss": 0.1467, "step": 12058 }, { "epoch": 0.9626406960964317, "grad_norm": 0.2654511790591342, "learning_rate": 7.311516730198476e-08, "loss": 0.1323, "step": 12059 }, { "epoch": 0.9627205236688753, "grad_norm": 0.24585864945202632, "learning_rate": 7.280342242692806e-08, "loss": 0.1896, "step": 12060 }, { "epoch": 0.9628003512413188, "grad_norm": 0.2789049762007264, "learning_rate": 7.249234114567572e-08, "loss": 0.1154, "step": 12061 }, { "epoch": 0.9628801788137623, "grad_norm": 0.2861005037323244, "learning_rate": 7.218192347902331e-08, "loss": 0.129, "step": 12062 }, { "epoch": 0.9629600063862058, "grad_norm": 0.2861441296807495, "learning_rate": 7.18721694477209e-08, "loss": 0.1687, "step": 12063 }, { "epoch": 0.9630398339586493, "grad_norm": 0.3121211296011583, "learning_rate": 7.156307907247418e-08, "loss": 0.1459, "step": 12064 }, { "epoch": 0.9631196615310929, "grad_norm": 0.2461006628203045, "learning_rate": 7.125465237394547e-08, "loss": 0.1395, "step": 12065 }, { "epoch": 0.9631994891035364, "grad_norm": 0.30787666638183875, "learning_rate": 7.094688937275052e-08, "loss": 0.1669, "step": 12066 }, { "epoch": 0.9632793166759799, "grad_norm": 0.28771934767639945, "learning_rate": 7.063979008946176e-08, "loss": 0.1344, "step": 12067 }, { "epoch": 0.9633591442484234, "grad_norm": 0.2684993305670852, "learning_rate": 7.033335454460833e-08, "loss": 0.1602, "step": 12068 }, { "epoch": 0.9634389718208669, "grad_norm": 0.2722157684598677, "learning_rate": 7.002758275867493e-08, "loss": 0.1509, "step": 12069 }, { "epoch": 0.9635187993933104, "grad_norm": 0.3125703360654703, "learning_rate": 6.972247475210081e-08, "loss": 0.1555, "step": 12070 }, { "epoch": 0.9635986269657539, "grad_norm": 0.24805129618434957, "learning_rate": 6.941803054527963e-08, "loss": 0.1879, "step": 12071 }, { "epoch": 0.9636784545381974, "grad_norm": 0.26560452978310517, "learning_rate": 6.91142501585651e-08, "loss": 0.1324, "step": 12072 }, { "epoch": 0.963758282110641, "grad_norm": 0.26264199598647514, "learning_rate": 6.8811133612261e-08, "loss": 0.17, "step": 12073 }, { "epoch": 0.9638381096830846, "grad_norm": 0.26898528249724046, "learning_rate": 6.85086809266311e-08, "loss": 0.1222, "step": 12074 }, { "epoch": 0.9639179372555281, "grad_norm": 0.3253607339499777, "learning_rate": 6.820689212189368e-08, "loss": 0.1715, "step": 12075 }, { "epoch": 0.9639977648279716, "grad_norm": 0.2820121625827287, "learning_rate": 6.79057672182204e-08, "loss": 0.1429, "step": 12076 }, { "epoch": 0.9640775924004151, "grad_norm": 0.2631825124815454, "learning_rate": 6.760530623574069e-08, "loss": 0.1385, "step": 12077 }, { "epoch": 0.9641574199728586, "grad_norm": 0.2639355542436899, "learning_rate": 6.73055091945407e-08, "loss": 0.1752, "step": 12078 }, { "epoch": 0.9642372475453022, "grad_norm": 0.26625731521822293, "learning_rate": 6.700637611465888e-08, "loss": 0.1199, "step": 12079 }, { "epoch": 0.9643170751177457, "grad_norm": 0.23414228307168933, "learning_rate": 6.670790701609143e-08, "loss": 0.1628, "step": 12080 }, { "epoch": 0.9643969026901892, "grad_norm": 0.2935568841400159, "learning_rate": 6.641010191879016e-08, "loss": 0.2061, "step": 12081 }, { "epoch": 0.9644767302626327, "grad_norm": 0.276887263419805, "learning_rate": 6.611296084266361e-08, "loss": 0.1553, "step": 12082 }, { "epoch": 0.9645565578350762, "grad_norm": 0.2891851696572337, "learning_rate": 6.581648380757145e-08, "loss": 0.127, "step": 12083 }, { "epoch": 0.9646363854075197, "grad_norm": 0.3078167822565123, "learning_rate": 6.552067083333447e-08, "loss": 0.146, "step": 12084 }, { "epoch": 0.9647162129799632, "grad_norm": 0.2627296028430193, "learning_rate": 6.522552193972465e-08, "loss": 0.1757, "step": 12085 }, { "epoch": 0.9647960405524068, "grad_norm": 0.26145536580832274, "learning_rate": 6.493103714647397e-08, "loss": 0.1455, "step": 12086 }, { "epoch": 0.9648758681248504, "grad_norm": 0.3142062200851704, "learning_rate": 6.463721647326559e-08, "loss": 0.158, "step": 12087 }, { "epoch": 0.9649556956972939, "grad_norm": 0.2709271418333148, "learning_rate": 6.434405993974046e-08, "loss": 0.1503, "step": 12088 }, { "epoch": 0.9650355232697374, "grad_norm": 0.2778169129718002, "learning_rate": 6.405156756549624e-08, "loss": 0.1635, "step": 12089 }, { "epoch": 0.9651153508421809, "grad_norm": 0.316841870401451, "learning_rate": 6.375973937008285e-08, "loss": 0.1421, "step": 12090 }, { "epoch": 0.9651951784146244, "grad_norm": 0.2914817913864536, "learning_rate": 6.34685753730102e-08, "loss": 0.1607, "step": 12091 }, { "epoch": 0.965275005987068, "grad_norm": 0.3005010718154539, "learning_rate": 6.317807559373945e-08, "loss": 0.1336, "step": 12092 }, { "epoch": 0.9653548335595115, "grad_norm": 0.30659124801162957, "learning_rate": 6.288824005169058e-08, "loss": 0.1815, "step": 12093 }, { "epoch": 0.965434661131955, "grad_norm": 0.26813287537504715, "learning_rate": 6.2599068766237e-08, "loss": 0.1322, "step": 12094 }, { "epoch": 0.9655144887043985, "grad_norm": 0.27285153958362146, "learning_rate": 6.231056175670991e-08, "loss": 0.2055, "step": 12095 }, { "epoch": 0.965594316276842, "grad_norm": 0.2906867894067029, "learning_rate": 6.202271904239388e-08, "loss": 0.1793, "step": 12096 }, { "epoch": 0.9656741438492855, "grad_norm": 0.25579547778143064, "learning_rate": 6.173554064252907e-08, "loss": 0.1406, "step": 12097 }, { "epoch": 0.965753971421729, "grad_norm": 0.32711577575210304, "learning_rate": 6.144902657631457e-08, "loss": 0.1735, "step": 12098 }, { "epoch": 0.9658337989941725, "grad_norm": 0.2639882056689943, "learning_rate": 6.116317686290175e-08, "loss": 0.1586, "step": 12099 }, { "epoch": 0.9659136265666161, "grad_norm": 0.2794140872375548, "learning_rate": 6.087799152139861e-08, "loss": 0.1246, "step": 12100 }, { "epoch": 0.9659934541390597, "grad_norm": 0.3036545217593543, "learning_rate": 6.059347057086773e-08, "loss": 0.1904, "step": 12101 }, { "epoch": 0.9660732817115032, "grad_norm": 0.3359140432542421, "learning_rate": 6.030961403032831e-08, "loss": 0.1642, "step": 12102 }, { "epoch": 0.9661531092839467, "grad_norm": 0.2535972542548619, "learning_rate": 6.002642191875741e-08, "loss": 0.1853, "step": 12103 }, { "epoch": 0.9662329368563902, "grad_norm": 0.27230045124343416, "learning_rate": 5.974389425508209e-08, "loss": 0.1455, "step": 12104 }, { "epoch": 0.9663127644288337, "grad_norm": 0.26328038428169165, "learning_rate": 5.9462031058190594e-08, "loss": 0.1504, "step": 12105 }, { "epoch": 0.9663925920012773, "grad_norm": 0.34007206330803663, "learning_rate": 5.918083234692229e-08, "loss": 0.1432, "step": 12106 }, { "epoch": 0.9664724195737208, "grad_norm": 0.2902674188235741, "learning_rate": 5.8900298140076585e-08, "loss": 0.1701, "step": 12107 }, { "epoch": 0.9665522471461643, "grad_norm": 0.3024032879921754, "learning_rate": 5.862042845640403e-08, "loss": 0.1629, "step": 12108 }, { "epoch": 0.9666320747186078, "grad_norm": 0.2724884040233477, "learning_rate": 5.834122331461412e-08, "loss": 0.2018, "step": 12109 }, { "epoch": 0.9667119022910513, "grad_norm": 0.2619466346626353, "learning_rate": 5.806268273336968e-08, "loss": 0.1501, "step": 12110 }, { "epoch": 0.9667917298634948, "grad_norm": 0.2955502583289476, "learning_rate": 5.778480673129028e-08, "loss": 0.1826, "step": 12111 }, { "epoch": 0.9668715574359383, "grad_norm": 0.2773731601683169, "learning_rate": 5.7507595326951046e-08, "loss": 0.1432, "step": 12112 }, { "epoch": 0.9669513850083818, "grad_norm": 0.278308215554228, "learning_rate": 5.7231048538882725e-08, "loss": 0.1625, "step": 12113 }, { "epoch": 0.9670312125808255, "grad_norm": 0.26083730287674145, "learning_rate": 5.695516638557164e-08, "loss": 0.1286, "step": 12114 }, { "epoch": 0.967111040153269, "grad_norm": 0.255148068957944, "learning_rate": 5.667994888545747e-08, "loss": 0.1228, "step": 12115 }, { "epoch": 0.9671908677257125, "grad_norm": 0.2501001313996353, "learning_rate": 5.640539605693995e-08, "loss": 0.1596, "step": 12116 }, { "epoch": 0.967270695298156, "grad_norm": 0.2857402601106407, "learning_rate": 5.6131507918369964e-08, "loss": 0.1337, "step": 12117 }, { "epoch": 0.9673505228705995, "grad_norm": 0.25850152723806863, "learning_rate": 5.58582844880573e-08, "loss": 0.1649, "step": 12118 }, { "epoch": 0.967430350443043, "grad_norm": 0.3057472317586037, "learning_rate": 5.558572578426513e-08, "loss": 0.1891, "step": 12119 }, { "epoch": 0.9675101780154866, "grad_norm": 0.24343803219621327, "learning_rate": 5.531383182521222e-08, "loss": 0.1274, "step": 12120 }, { "epoch": 0.9675900055879301, "grad_norm": 0.279748737969983, "learning_rate": 5.504260262907513e-08, "loss": 0.1402, "step": 12121 }, { "epoch": 0.9676698331603736, "grad_norm": 0.2920300694121043, "learning_rate": 5.477203821398269e-08, "loss": 0.2426, "step": 12122 }, { "epoch": 0.9677496607328171, "grad_norm": 0.26646953519140953, "learning_rate": 5.450213859802267e-08, "loss": 0.1584, "step": 12123 }, { "epoch": 0.9678294883052606, "grad_norm": 0.2369170580359399, "learning_rate": 5.423290379923618e-08, "loss": 0.1794, "step": 12124 }, { "epoch": 0.9679093158777041, "grad_norm": 0.2690994161586298, "learning_rate": 5.396433383561994e-08, "loss": 0.2336, "step": 12125 }, { "epoch": 0.9679891434501476, "grad_norm": 0.30147697953562985, "learning_rate": 5.369642872512848e-08, "loss": 0.1779, "step": 12126 }, { "epoch": 0.9680689710225912, "grad_norm": 0.26053847248254314, "learning_rate": 5.342918848566747e-08, "loss": 0.1631, "step": 12127 }, { "epoch": 0.9681487985950348, "grad_norm": 0.27776089088009787, "learning_rate": 5.316261313510374e-08, "loss": 0.1801, "step": 12128 }, { "epoch": 0.9682286261674783, "grad_norm": 0.2691183170784815, "learning_rate": 5.289670269125413e-08, "loss": 0.152, "step": 12129 }, { "epoch": 0.9683084537399218, "grad_norm": 0.26176348658267024, "learning_rate": 5.263145717189666e-08, "loss": 0.1378, "step": 12130 }, { "epoch": 0.9683882813123653, "grad_norm": 0.2533440726998443, "learning_rate": 5.2366876594759366e-08, "loss": 0.1933, "step": 12131 }, { "epoch": 0.9684681088848088, "grad_norm": 0.27248430191991097, "learning_rate": 5.210296097753032e-08, "loss": 0.1476, "step": 12132 }, { "epoch": 0.9685479364572523, "grad_norm": 0.28487150114927556, "learning_rate": 5.183971033784874e-08, "loss": 0.2365, "step": 12133 }, { "epoch": 0.9686277640296959, "grad_norm": 0.2972919160858522, "learning_rate": 5.1577124693313883e-08, "loss": 0.1216, "step": 12134 }, { "epoch": 0.9687075916021394, "grad_norm": 0.27840193269603525, "learning_rate": 5.1315204061479496e-08, "loss": 0.1905, "step": 12135 }, { "epoch": 0.9687874191745829, "grad_norm": 0.2716937883183753, "learning_rate": 5.1053948459851565e-08, "loss": 0.1288, "step": 12136 }, { "epoch": 0.9688672467470264, "grad_norm": 0.2629579694360365, "learning_rate": 5.0793357905895015e-08, "loss": 0.2014, "step": 12137 }, { "epoch": 0.9689470743194699, "grad_norm": 0.28029446162297367, "learning_rate": 5.053343241702924e-08, "loss": 0.1524, "step": 12138 }, { "epoch": 0.9690269018919134, "grad_norm": 0.2592901589055319, "learning_rate": 5.027417201062923e-08, "loss": 0.1488, "step": 12139 }, { "epoch": 0.9691067294643569, "grad_norm": 0.33008712755584974, "learning_rate": 5.001557670402557e-08, "loss": 0.1376, "step": 12140 }, { "epoch": 0.9691865570368005, "grad_norm": 0.32422532143476623, "learning_rate": 4.975764651450443e-08, "loss": 0.1559, "step": 12141 }, { "epoch": 0.9692663846092441, "grad_norm": 0.2793715931463242, "learning_rate": 4.950038145930758e-08, "loss": 0.1765, "step": 12142 }, { "epoch": 0.9693462121816876, "grad_norm": 0.2787461604913332, "learning_rate": 4.9243781555631253e-08, "loss": 0.1881, "step": 12143 }, { "epoch": 0.9694260397541311, "grad_norm": 0.25782286128702064, "learning_rate": 4.898784682062951e-08, "loss": 0.1385, "step": 12144 }, { "epoch": 0.9695058673265746, "grad_norm": 0.26406577431849604, "learning_rate": 4.873257727140979e-08, "loss": 0.1272, "step": 12145 }, { "epoch": 0.9695856948990181, "grad_norm": 0.27105255704931097, "learning_rate": 4.847797292503509e-08, "loss": 0.195, "step": 12146 }, { "epoch": 0.9696655224714616, "grad_norm": 0.2718465760602814, "learning_rate": 4.822403379852625e-08, "loss": 0.1279, "step": 12147 }, { "epoch": 0.9697453500439052, "grad_norm": 0.33969616832683586, "learning_rate": 4.7970759908857464e-08, "loss": 0.1857, "step": 12148 }, { "epoch": 0.9698251776163487, "grad_norm": 0.28076815775477243, "learning_rate": 4.7718151272958534e-08, "loss": 0.1413, "step": 12149 }, { "epoch": 0.9699050051887922, "grad_norm": 0.2782072301061438, "learning_rate": 4.746620790771594e-08, "loss": 0.2095, "step": 12150 }, { "epoch": 0.9699848327612357, "grad_norm": 0.33525333223007686, "learning_rate": 4.721492982997067e-08, "loss": 0.1565, "step": 12151 }, { "epoch": 0.9700646603336792, "grad_norm": 0.31664451814493355, "learning_rate": 4.696431705652038e-08, "loss": 0.1713, "step": 12152 }, { "epoch": 0.9701444879061227, "grad_norm": 0.3108521151276844, "learning_rate": 4.671436960411724e-08, "loss": 0.1795, "step": 12153 }, { "epoch": 0.9702243154785662, "grad_norm": 0.27999089498888075, "learning_rate": 4.6465087489468984e-08, "loss": 0.1427, "step": 12154 }, { "epoch": 0.9703041430510099, "grad_norm": 0.3029724438063523, "learning_rate": 4.621647072923896e-08, "loss": 0.1704, "step": 12155 }, { "epoch": 0.9703839706234534, "grad_norm": 0.29127312774016717, "learning_rate": 4.59685193400472e-08, "loss": 0.1778, "step": 12156 }, { "epoch": 0.9704637981958969, "grad_norm": 0.29447798917099444, "learning_rate": 4.572123333846712e-08, "loss": 0.1621, "step": 12157 }, { "epoch": 0.9705436257683404, "grad_norm": 0.28693397338785565, "learning_rate": 4.5474612741029934e-08, "loss": 0.1644, "step": 12158 }, { "epoch": 0.9706234533407839, "grad_norm": 0.2386351898493003, "learning_rate": 4.522865756421912e-08, "loss": 0.136, "step": 12159 }, { "epoch": 0.9707032809132274, "grad_norm": 0.2800176104736512, "learning_rate": 4.498336782447932e-08, "loss": 0.1686, "step": 12160 }, { "epoch": 0.970783108485671, "grad_norm": 0.2986552685633075, "learning_rate": 4.473874353820407e-08, "loss": 0.1199, "step": 12161 }, { "epoch": 0.9708629360581145, "grad_norm": 0.28554772713029086, "learning_rate": 4.4494784721746954e-08, "loss": 0.2034, "step": 12162 }, { "epoch": 0.970942763630558, "grad_norm": 0.29409696212923575, "learning_rate": 4.4251491391414935e-08, "loss": 0.158, "step": 12163 }, { "epoch": 0.9710225912030015, "grad_norm": 0.29303556488648225, "learning_rate": 4.400886356347167e-08, "loss": 0.164, "step": 12164 }, { "epoch": 0.971102418775445, "grad_norm": 0.2840601606153954, "learning_rate": 4.37669012541353e-08, "loss": 0.1712, "step": 12165 }, { "epoch": 0.9711822463478885, "grad_norm": 0.26221360200525967, "learning_rate": 4.3525604479581765e-08, "loss": 0.2049, "step": 12166 }, { "epoch": 0.971262073920332, "grad_norm": 0.2736979832847153, "learning_rate": 4.328497325593928e-08, "loss": 0.1546, "step": 12167 }, { "epoch": 0.9713419014927756, "grad_norm": 0.2222404490269308, "learning_rate": 4.3045007599292756e-08, "loss": 0.1572, "step": 12168 }, { "epoch": 0.9714217290652192, "grad_norm": 0.2893978127566078, "learning_rate": 4.28057075256838e-08, "loss": 0.188, "step": 12169 }, { "epoch": 0.9715015566376627, "grad_norm": 0.27838698577296866, "learning_rate": 4.256707305110963e-08, "loss": 0.1489, "step": 12170 }, { "epoch": 0.9715813842101062, "grad_norm": 0.31288679152941073, "learning_rate": 4.2329104191519696e-08, "loss": 0.16, "step": 12171 }, { "epoch": 0.9716612117825497, "grad_norm": 0.28539788059108046, "learning_rate": 4.209180096282239e-08, "loss": 0.1749, "step": 12172 }, { "epoch": 0.9717410393549932, "grad_norm": 0.29194179729611125, "learning_rate": 4.185516338088058e-08, "loss": 0.1562, "step": 12173 }, { "epoch": 0.9718208669274367, "grad_norm": 0.2444395719081768, "learning_rate": 4.161919146151383e-08, "loss": 0.1509, "step": 12174 }, { "epoch": 0.9719006944998803, "grad_norm": 0.2880637825148801, "learning_rate": 4.138388522049286e-08, "loss": 0.1398, "step": 12175 }, { "epoch": 0.9719805220723238, "grad_norm": 0.31231681679848927, "learning_rate": 4.114924467354953e-08, "loss": 0.1796, "step": 12176 }, { "epoch": 0.9720603496447673, "grad_norm": 0.23977966011695978, "learning_rate": 4.091526983636796e-08, "loss": 0.1502, "step": 12177 }, { "epoch": 0.9721401772172108, "grad_norm": 0.31426787567472236, "learning_rate": 4.0681960724587857e-08, "loss": 0.17, "step": 12178 }, { "epoch": 0.9722200047896543, "grad_norm": 0.27502521396747653, "learning_rate": 4.044931735380564e-08, "loss": 0.1915, "step": 12179 }, { "epoch": 0.9722998323620978, "grad_norm": 0.3435521950882514, "learning_rate": 4.02173397395722e-08, "loss": 0.1849, "step": 12180 }, { "epoch": 0.9723796599345413, "grad_norm": 0.24918261794382218, "learning_rate": 3.998602789739514e-08, "loss": 0.1673, "step": 12181 }, { "epoch": 0.972459487506985, "grad_norm": 0.26118928468148284, "learning_rate": 3.975538184273542e-08, "loss": 0.1174, "step": 12182 }, { "epoch": 0.9725393150794285, "grad_norm": 0.28133836634126014, "learning_rate": 3.9525401591011816e-08, "loss": 0.1833, "step": 12183 }, { "epoch": 0.972619142651872, "grad_norm": 0.27976304967157317, "learning_rate": 3.9296087157596476e-08, "loss": 0.1668, "step": 12184 }, { "epoch": 0.9726989702243155, "grad_norm": 0.2889118850627945, "learning_rate": 3.906743855782047e-08, "loss": 0.1611, "step": 12185 }, { "epoch": 0.972778797796759, "grad_norm": 0.27400008304511486, "learning_rate": 3.883945580696602e-08, "loss": 0.1324, "step": 12186 }, { "epoch": 0.9728586253692025, "grad_norm": 0.2915818957269741, "learning_rate": 3.861213892027316e-08, "loss": 0.1487, "step": 12187 }, { "epoch": 0.972938452941646, "grad_norm": 0.26170621044603387, "learning_rate": 3.83854879129375e-08, "loss": 0.1709, "step": 12188 }, { "epoch": 0.9730182805140896, "grad_norm": 0.29744366715010206, "learning_rate": 3.815950280011027e-08, "loss": 0.1539, "step": 12189 }, { "epoch": 0.9730981080865331, "grad_norm": 0.28974191609419553, "learning_rate": 3.793418359689605e-08, "loss": 0.1598, "step": 12190 }, { "epoch": 0.9731779356589766, "grad_norm": 0.28142189877641416, "learning_rate": 3.770953031835833e-08, "loss": 0.1357, "step": 12191 }, { "epoch": 0.9732577632314201, "grad_norm": 0.2760083116037369, "learning_rate": 3.74855429795129e-08, "loss": 0.1412, "step": 12192 }, { "epoch": 0.9733375908038636, "grad_norm": 0.2362077841394999, "learning_rate": 3.7262221595334434e-08, "loss": 0.1381, "step": 12193 }, { "epoch": 0.9734174183763071, "grad_norm": 0.2711528892991077, "learning_rate": 3.7039566180747666e-08, "loss": 0.0684, "step": 12194 }, { "epoch": 0.9734972459487506, "grad_norm": 0.2756513010867185, "learning_rate": 3.6817576750639575e-08, "loss": 0.1113, "step": 12195 }, { "epoch": 0.9735770735211943, "grad_norm": 0.29582752945334734, "learning_rate": 3.6596253319847175e-08, "loss": 0.1472, "step": 12196 }, { "epoch": 0.9736569010936378, "grad_norm": 0.2819396982560753, "learning_rate": 3.637559590316642e-08, "loss": 0.1224, "step": 12197 }, { "epoch": 0.9737367286660813, "grad_norm": 0.3315291279263139, "learning_rate": 3.615560451534661e-08, "loss": 0.1842, "step": 12198 }, { "epoch": 0.9738165562385248, "grad_norm": 0.27334442735870046, "learning_rate": 3.593627917109377e-08, "loss": 0.1345, "step": 12199 }, { "epoch": 0.9738963838109683, "grad_norm": 0.2879903931358731, "learning_rate": 3.571761988506839e-08, "loss": 0.157, "step": 12200 }, { "epoch": 0.9739762113834118, "grad_norm": 0.2991528389915336, "learning_rate": 3.5499626671887663e-08, "loss": 0.1512, "step": 12201 }, { "epoch": 0.9740560389558554, "grad_norm": 0.28039889517477584, "learning_rate": 3.528229954612328e-08, "loss": 0.1726, "step": 12202 }, { "epoch": 0.9741358665282989, "grad_norm": 0.29327830120378084, "learning_rate": 3.506563852230138e-08, "loss": 0.1696, "step": 12203 }, { "epoch": 0.9742156941007424, "grad_norm": 0.25084139062736793, "learning_rate": 3.4849643614907055e-08, "loss": 0.1392, "step": 12204 }, { "epoch": 0.9742955216731859, "grad_norm": 0.2942780567590904, "learning_rate": 3.4634314838377645e-08, "loss": 0.1579, "step": 12205 }, { "epoch": 0.9743753492456294, "grad_norm": 0.23265988596972603, "learning_rate": 3.4419652207107194e-08, "loss": 0.1693, "step": 12206 }, { "epoch": 0.9744551768180729, "grad_norm": 0.26239419200534053, "learning_rate": 3.420565573544421e-08, "loss": 0.145, "step": 12207 }, { "epoch": 0.9745350043905164, "grad_norm": 0.2609570276220204, "learning_rate": 3.399232543769504e-08, "loss": 0.1279, "step": 12208 }, { "epoch": 0.9746148319629601, "grad_norm": 0.3071223077796915, "learning_rate": 3.3779661328119384e-08, "loss": 0.189, "step": 12209 }, { "epoch": 0.9746946595354036, "grad_norm": 0.2880178766912483, "learning_rate": 3.356766342093254e-08, "loss": 0.1503, "step": 12210 }, { "epoch": 0.9747744871078471, "grad_norm": 0.2872334360859119, "learning_rate": 3.335633173030539e-08, "loss": 0.1949, "step": 12211 }, { "epoch": 0.9748543146802906, "grad_norm": 0.2721129631401996, "learning_rate": 3.314566627036553e-08, "loss": 0.1295, "step": 12212 }, { "epoch": 0.9749341422527341, "grad_norm": 0.3341817644864703, "learning_rate": 3.293566705519502e-08, "loss": 0.1719, "step": 12213 }, { "epoch": 0.9750139698251776, "grad_norm": 0.3642213160036012, "learning_rate": 3.2726334098831525e-08, "loss": 0.2019, "step": 12214 }, { "epoch": 0.9750937973976211, "grad_norm": 0.2852828412925782, "learning_rate": 3.2517667415268295e-08, "loss": 0.1423, "step": 12215 }, { "epoch": 0.9751736249700647, "grad_norm": 0.3406222184516732, "learning_rate": 3.230966701845306e-08, "loss": 0.2107, "step": 12216 }, { "epoch": 0.9752534525425082, "grad_norm": 0.32986484364377944, "learning_rate": 3.210233292228915e-08, "loss": 0.1733, "step": 12217 }, { "epoch": 0.9753332801149517, "grad_norm": 0.2723542785453338, "learning_rate": 3.1895665140638795e-08, "loss": 0.1762, "step": 12218 }, { "epoch": 0.9754131076873952, "grad_norm": 0.2678761264562817, "learning_rate": 3.16896636873143e-08, "loss": 0.1239, "step": 12219 }, { "epoch": 0.9754929352598387, "grad_norm": 0.2904586320335278, "learning_rate": 3.148432857608685e-08, "loss": 0.1335, "step": 12220 }, { "epoch": 0.9755727628322822, "grad_norm": 0.3311002056723937, "learning_rate": 3.127965982068215e-08, "loss": 0.1638, "step": 12221 }, { "epoch": 0.9756525904047257, "grad_norm": 0.28210447035460695, "learning_rate": 3.107565743478369e-08, "loss": 0.1643, "step": 12222 }, { "epoch": 0.9757324179771694, "grad_norm": 0.2537302611058766, "learning_rate": 3.087232143202501e-08, "loss": 0.1285, "step": 12223 }, { "epoch": 0.9758122455496129, "grad_norm": 0.30990760218388785, "learning_rate": 3.066965182599857e-08, "loss": 0.1601, "step": 12224 }, { "epoch": 0.9758920731220564, "grad_norm": 0.2882027744816077, "learning_rate": 3.046764863025464e-08, "loss": 0.2166, "step": 12225 }, { "epoch": 0.9759719006944999, "grad_norm": 0.34202349703561225, "learning_rate": 3.026631185829354e-08, "loss": 0.1643, "step": 12226 }, { "epoch": 0.9760517282669434, "grad_norm": 0.2989513877911597, "learning_rate": 3.00656415235756e-08, "loss": 0.1353, "step": 12227 }, { "epoch": 0.9761315558393869, "grad_norm": 0.30006919396462467, "learning_rate": 2.986563763951456e-08, "loss": 0.144, "step": 12228 }, { "epoch": 0.9762113834118304, "grad_norm": 0.2508890858562031, "learning_rate": 2.9666300219478584e-08, "loss": 0.1695, "step": 12229 }, { "epoch": 0.976291210984274, "grad_norm": 0.30227719162175504, "learning_rate": 2.9467629276794807e-08, "loss": 0.1755, "step": 12230 }, { "epoch": 0.9763710385567175, "grad_norm": 0.32767653476639436, "learning_rate": 2.9269624824741494e-08, "loss": 0.1988, "step": 12231 }, { "epoch": 0.976450866129161, "grad_norm": 0.2628359852080929, "learning_rate": 2.9072286876556945e-08, "loss": 0.1776, "step": 12232 }, { "epoch": 0.9765306937016045, "grad_norm": 0.290747326631367, "learning_rate": 2.8875615445428385e-08, "loss": 0.1475, "step": 12233 }, { "epoch": 0.976610521274048, "grad_norm": 0.29093957569263323, "learning_rate": 2.867961054450641e-08, "loss": 0.1764, "step": 12234 }, { "epoch": 0.9766903488464915, "grad_norm": 0.2822330077955435, "learning_rate": 2.8484272186892757e-08, "loss": 0.1657, "step": 12235 }, { "epoch": 0.9767701764189352, "grad_norm": 0.25473102684951854, "learning_rate": 2.8289600385642546e-08, "loss": 0.1048, "step": 12236 }, { "epoch": 0.9768500039913787, "grad_norm": 0.3562454638495398, "learning_rate": 2.8095595153770916e-08, "loss": 0.1553, "step": 12237 }, { "epoch": 0.9769298315638222, "grad_norm": 0.3252974730128081, "learning_rate": 2.7902256504245274e-08, "loss": 0.165, "step": 12238 }, { "epoch": 0.9770096591362657, "grad_norm": 0.27826333835469896, "learning_rate": 2.7709584449989725e-08, "loss": 0.1706, "step": 12239 }, { "epoch": 0.9770894867087092, "grad_norm": 0.310752893009407, "learning_rate": 2.7517579003883964e-08, "loss": 0.1526, "step": 12240 }, { "epoch": 0.9771693142811527, "grad_norm": 0.27110069618876553, "learning_rate": 2.7326240178763286e-08, "loss": 0.1657, "step": 12241 }, { "epoch": 0.9772491418535962, "grad_norm": 0.2910887373750897, "learning_rate": 2.7135567987417455e-08, "loss": 0.1538, "step": 12242 }, { "epoch": 0.9773289694260398, "grad_norm": 0.26689785103785263, "learning_rate": 2.6945562442591832e-08, "loss": 0.1703, "step": 12243 }, { "epoch": 0.9774087969984833, "grad_norm": 0.26928117631421994, "learning_rate": 2.675622355698737e-08, "loss": 0.1891, "step": 12244 }, { "epoch": 0.9774886245709268, "grad_norm": 0.24578313315508726, "learning_rate": 2.6567551343261722e-08, "loss": 0.1461, "step": 12245 }, { "epoch": 0.9775684521433703, "grad_norm": 0.2886207868751199, "learning_rate": 2.637954581402591e-08, "loss": 0.1078, "step": 12246 }, { "epoch": 0.9776482797158138, "grad_norm": 0.2952081117440229, "learning_rate": 2.6192206981847657e-08, "loss": 0.2004, "step": 12247 }, { "epoch": 0.9777281072882573, "grad_norm": 0.2994649148248199, "learning_rate": 2.600553485924917e-08, "loss": 0.1879, "step": 12248 }, { "epoch": 0.9778079348607008, "grad_norm": 0.315721845046253, "learning_rate": 2.581952945871047e-08, "loss": 0.2331, "step": 12249 }, { "epoch": 0.9778877624331445, "grad_norm": 0.27498697897690916, "learning_rate": 2.5634190792662717e-08, "loss": 0.1522, "step": 12250 }, { "epoch": 0.977967590005588, "grad_norm": 0.26848663857539556, "learning_rate": 2.5449518873498225e-08, "loss": 0.1353, "step": 12251 }, { "epoch": 0.9780474175780315, "grad_norm": 0.2832573425914589, "learning_rate": 2.5265513713558233e-08, "loss": 0.1436, "step": 12252 }, { "epoch": 0.978127245150475, "grad_norm": 0.26174352416958874, "learning_rate": 2.5082175325146228e-08, "loss": 0.1286, "step": 12253 }, { "epoch": 0.9782070727229185, "grad_norm": 0.29651558655568794, "learning_rate": 2.489950372051464e-08, "loss": 0.2225, "step": 12254 }, { "epoch": 0.978286900295362, "grad_norm": 0.25098416129541823, "learning_rate": 2.4717498911875916e-08, "loss": 0.1998, "step": 12255 }, { "epoch": 0.9783667278678055, "grad_norm": 0.2891715227608924, "learning_rate": 2.453616091139477e-08, "loss": 0.1835, "step": 12256 }, { "epoch": 0.978446555440249, "grad_norm": 0.26393553077693793, "learning_rate": 2.4355489731195946e-08, "loss": 0.1709, "step": 12257 }, { "epoch": 0.9785263830126926, "grad_norm": 0.299644798756351, "learning_rate": 2.4175485383354237e-08, "loss": 0.1711, "step": 12258 }, { "epoch": 0.9786062105851361, "grad_norm": 0.2685078822545008, "learning_rate": 2.3996147879902232e-08, "loss": 0.1523, "step": 12259 }, { "epoch": 0.9786860381575796, "grad_norm": 0.2821012208730478, "learning_rate": 2.3817477232829234e-08, "loss": 0.1596, "step": 12260 }, { "epoch": 0.9787658657300231, "grad_norm": 0.2600417799380999, "learning_rate": 2.36394734540768e-08, "loss": 0.1651, "step": 12261 }, { "epoch": 0.9788456933024666, "grad_norm": 0.2430240358946717, "learning_rate": 2.3462136555546522e-08, "loss": 0.1394, "step": 12262 }, { "epoch": 0.9789255208749102, "grad_norm": 0.27777168113397244, "learning_rate": 2.328546654909003e-08, "loss": 0.1571, "step": 12263 }, { "epoch": 0.9790053484473538, "grad_norm": 0.3235770790148385, "learning_rate": 2.3109463446517876e-08, "loss": 0.1974, "step": 12264 }, { "epoch": 0.9790851760197973, "grad_norm": 0.2841087977920959, "learning_rate": 2.2934127259595097e-08, "loss": 0.1317, "step": 12265 }, { "epoch": 0.9791650035922408, "grad_norm": 0.3108813281372791, "learning_rate": 2.2759458000043423e-08, "loss": 0.1602, "step": 12266 }, { "epoch": 0.9792448311646843, "grad_norm": 0.29535281841747185, "learning_rate": 2.258545567953796e-08, "loss": 0.1789, "step": 12267 }, { "epoch": 0.9793246587371278, "grad_norm": 0.3256619056947502, "learning_rate": 2.2412120309708297e-08, "loss": 0.1835, "step": 12268 }, { "epoch": 0.9794044863095713, "grad_norm": 0.2609622754225781, "learning_rate": 2.223945190214405e-08, "loss": 0.1217, "step": 12269 }, { "epoch": 0.9794843138820148, "grad_norm": 0.309878057692891, "learning_rate": 2.2067450468385986e-08, "loss": 0.1461, "step": 12270 }, { "epoch": 0.9795641414544584, "grad_norm": 0.25553859030328574, "learning_rate": 2.189611601993269e-08, "loss": 0.1358, "step": 12271 }, { "epoch": 0.9796439690269019, "grad_norm": 0.3044075106125161, "learning_rate": 2.1725448568236107e-08, "loss": 0.1427, "step": 12272 }, { "epoch": 0.9797237965993454, "grad_norm": 0.28651699798791563, "learning_rate": 2.155544812470378e-08, "loss": 0.1607, "step": 12273 }, { "epoch": 0.9798036241717889, "grad_norm": 0.3039226522359868, "learning_rate": 2.1386114700702176e-08, "loss": 0.1032, "step": 12274 }, { "epoch": 0.9798834517442324, "grad_norm": 0.2731280797527507, "learning_rate": 2.1217448307548906e-08, "loss": 0.2045, "step": 12275 }, { "epoch": 0.9799632793166759, "grad_norm": 0.3082901332021746, "learning_rate": 2.1049448956519393e-08, "loss": 0.1476, "step": 12276 }, { "epoch": 0.9800431068891196, "grad_norm": 0.2400699334061701, "learning_rate": 2.088211665884243e-08, "loss": 0.1537, "step": 12277 }, { "epoch": 0.9801229344615631, "grad_norm": 0.346531285632242, "learning_rate": 2.0715451425704636e-08, "loss": 0.1643, "step": 12278 }, { "epoch": 0.9802027620340066, "grad_norm": 0.3235388701610704, "learning_rate": 2.054945326824709e-08, "loss": 0.1548, "step": 12279 }, { "epoch": 0.9802825896064501, "grad_norm": 0.2706147544974751, "learning_rate": 2.0384122197565358e-08, "loss": 0.1382, "step": 12280 }, { "epoch": 0.9803624171788936, "grad_norm": 0.2864707778806079, "learning_rate": 2.0219458224711717e-08, "loss": 0.1626, "step": 12281 }, { "epoch": 0.9804422447513371, "grad_norm": 0.27849855271420426, "learning_rate": 2.0055461360692918e-08, "loss": 0.1911, "step": 12282 }, { "epoch": 0.9805220723237806, "grad_norm": 0.25251222530373746, "learning_rate": 1.98921316164713e-08, "loss": 0.1682, "step": 12283 }, { "epoch": 0.9806018998962241, "grad_norm": 0.2810300175291314, "learning_rate": 1.9729469002965907e-08, "loss": 0.2048, "step": 12284 }, { "epoch": 0.9806817274686677, "grad_norm": 0.27480776377602445, "learning_rate": 1.9567473531049154e-08, "loss": 0.1613, "step": 12285 }, { "epoch": 0.9807615550411112, "grad_norm": 0.2754015089467288, "learning_rate": 1.9406145211549045e-08, "loss": 0.1446, "step": 12286 }, { "epoch": 0.9808413826135547, "grad_norm": 0.31815092430044833, "learning_rate": 1.9245484055252505e-08, "loss": 0.2054, "step": 12287 }, { "epoch": 0.9809212101859982, "grad_norm": 0.3017405949865402, "learning_rate": 1.9085490072895397e-08, "loss": 0.1906, "step": 12288 }, { "epoch": 0.9810010377584417, "grad_norm": 0.3027748265078291, "learning_rate": 1.8926163275175824e-08, "loss": 0.1362, "step": 12289 }, { "epoch": 0.9810808653308852, "grad_norm": 0.25190857523137317, "learning_rate": 1.8767503672743048e-08, "loss": 0.1316, "step": 12290 }, { "epoch": 0.9811606929033289, "grad_norm": 0.2522892786979267, "learning_rate": 1.860951127620192e-08, "loss": 0.1337, "step": 12291 }, { "epoch": 0.9812405204757724, "grad_norm": 0.2978027936397589, "learning_rate": 1.84521860961151e-08, "loss": 0.1418, "step": 12292 }, { "epoch": 0.9813203480482159, "grad_norm": 0.28194081427821277, "learning_rate": 1.829552814299751e-08, "loss": 0.1703, "step": 12293 }, { "epoch": 0.9814001756206594, "grad_norm": 0.30382194405529594, "learning_rate": 1.8139537427322994e-08, "loss": 0.1656, "step": 12294 }, { "epoch": 0.9814800031931029, "grad_norm": 0.2766161645369933, "learning_rate": 1.798421395951766e-08, "loss": 0.1795, "step": 12295 }, { "epoch": 0.9815598307655464, "grad_norm": 0.25467984441409464, "learning_rate": 1.782955774996431e-08, "loss": 0.1303, "step": 12296 }, { "epoch": 0.9816396583379899, "grad_norm": 0.3080146774203531, "learning_rate": 1.7675568809001343e-08, "loss": 0.1647, "step": 12297 }, { "epoch": 0.9817194859104335, "grad_norm": 0.24491965231541257, "learning_rate": 1.7522247146922744e-08, "loss": 0.1279, "step": 12298 }, { "epoch": 0.981799313482877, "grad_norm": 0.28600198184157033, "learning_rate": 1.7369592773976986e-08, "loss": 0.1767, "step": 12299 }, { "epoch": 0.9818791410553205, "grad_norm": 0.2628173816919035, "learning_rate": 1.7217605700368123e-08, "loss": 0.1884, "step": 12300 }, { "epoch": 0.981958968627764, "grad_norm": 0.28095656544793557, "learning_rate": 1.706628593625581e-08, "loss": 0.1399, "step": 12301 }, { "epoch": 0.9820387962002075, "grad_norm": 0.36341124107191725, "learning_rate": 1.69156334917564e-08, "loss": 0.1883, "step": 12302 }, { "epoch": 0.982118623772651, "grad_norm": 0.2479860201074294, "learning_rate": 1.6765648376939614e-08, "loss": 0.1435, "step": 12303 }, { "epoch": 0.9821984513450946, "grad_norm": 0.24045398878466484, "learning_rate": 1.6616330601830767e-08, "loss": 0.1423, "step": 12304 }, { "epoch": 0.9822782789175382, "grad_norm": 0.2528129433482632, "learning_rate": 1.646768017641298e-08, "loss": 0.1653, "step": 12305 }, { "epoch": 0.9823581064899817, "grad_norm": 0.24689689828466127, "learning_rate": 1.6319697110620537e-08, "loss": 0.1188, "step": 12306 }, { "epoch": 0.9824379340624252, "grad_norm": 0.2879606915497095, "learning_rate": 1.6172381414346627e-08, "loss": 0.1619, "step": 12307 }, { "epoch": 0.9825177616348687, "grad_norm": 0.29027320379575505, "learning_rate": 1.6025733097440045e-08, "loss": 0.1469, "step": 12308 }, { "epoch": 0.9825975892073122, "grad_norm": 0.28929049904213927, "learning_rate": 1.5879752169700723e-08, "loss": 0.1534, "step": 12309 }, { "epoch": 0.9826774167797557, "grad_norm": 0.3053410068173111, "learning_rate": 1.573443864088975e-08, "loss": 0.1474, "step": 12310 }, { "epoch": 0.9827572443521992, "grad_norm": 0.27889437873385003, "learning_rate": 1.558979252071935e-08, "loss": 0.166, "step": 12311 }, { "epoch": 0.9828370719246428, "grad_norm": 0.2666310828466813, "learning_rate": 1.5445813818858456e-08, "loss": 0.1535, "step": 12312 }, { "epoch": 0.9829168994970863, "grad_norm": 0.32163853499922446, "learning_rate": 1.5302502544932706e-08, "loss": 0.1415, "step": 12313 }, { "epoch": 0.9829967270695298, "grad_norm": 0.26101748535392144, "learning_rate": 1.515985870851999e-08, "loss": 0.138, "step": 12314 }, { "epoch": 0.9830765546419733, "grad_norm": 0.2594110337147134, "learning_rate": 1.501788231915713e-08, "loss": 0.1793, "step": 12315 }, { "epoch": 0.9831563822144168, "grad_norm": 0.31406933551416877, "learning_rate": 1.4876573386333192e-08, "loss": 0.1426, "step": 12316 }, { "epoch": 0.9832362097868603, "grad_norm": 0.30450124066012363, "learning_rate": 1.4735931919496182e-08, "loss": 0.1743, "step": 12317 }, { "epoch": 0.983316037359304, "grad_norm": 0.2787025676097189, "learning_rate": 1.4595957928045246e-08, "loss": 0.1363, "step": 12318 }, { "epoch": 0.9833958649317475, "grad_norm": 0.2638590550409465, "learning_rate": 1.4456651421338452e-08, "loss": 0.2241, "step": 12319 }, { "epoch": 0.983475692504191, "grad_norm": 0.2729087025215097, "learning_rate": 1.4318012408687243e-08, "loss": 0.1434, "step": 12320 }, { "epoch": 0.9835555200766345, "grad_norm": 0.27811234999364337, "learning_rate": 1.4180040899359759e-08, "loss": 0.1807, "step": 12321 }, { "epoch": 0.983635347649078, "grad_norm": 0.2689936665757621, "learning_rate": 1.4042736902578624e-08, "loss": 0.1753, "step": 12322 }, { "epoch": 0.9837151752215215, "grad_norm": 0.3229092485668049, "learning_rate": 1.390610042752094e-08, "loss": 0.1434, "step": 12323 }, { "epoch": 0.983795002793965, "grad_norm": 0.26891822325313713, "learning_rate": 1.3770131483322734e-08, "loss": 0.1563, "step": 12324 }, { "epoch": 0.9838748303664085, "grad_norm": 0.25524045637129134, "learning_rate": 1.3634830079070072e-08, "loss": 0.1212, "step": 12325 }, { "epoch": 0.9839546579388521, "grad_norm": 0.3088022959881672, "learning_rate": 1.3500196223809047e-08, "loss": 0.1644, "step": 12326 }, { "epoch": 0.9840344855112956, "grad_norm": 0.27546013227857047, "learning_rate": 1.3366229926539131e-08, "loss": 0.1573, "step": 12327 }, { "epoch": 0.9841143130837391, "grad_norm": 0.2489838700968757, "learning_rate": 1.3232931196216492e-08, "loss": 0.1542, "step": 12328 }, { "epoch": 0.9841941406561826, "grad_norm": 0.2971616258397372, "learning_rate": 1.3100300041749559e-08, "loss": 0.1945, "step": 12329 }, { "epoch": 0.9842739682286261, "grad_norm": 0.29846430762046633, "learning_rate": 1.2968336472004572e-08, "loss": 0.1888, "step": 12330 }, { "epoch": 0.9843537958010697, "grad_norm": 0.250450177212095, "learning_rate": 1.2837040495804476e-08, "loss": 0.1883, "step": 12331 }, { "epoch": 0.9844336233735133, "grad_norm": 0.2655993869826336, "learning_rate": 1.2706412121924472e-08, "loss": 0.1372, "step": 12332 }, { "epoch": 0.9845134509459568, "grad_norm": 0.29682341588920574, "learning_rate": 1.2576451359096464e-08, "loss": 0.155, "step": 12333 }, { "epoch": 0.9845932785184003, "grad_norm": 0.3011305242292502, "learning_rate": 1.2447158216009058e-08, "loss": 0.1722, "step": 12334 }, { "epoch": 0.9846731060908438, "grad_norm": 0.301824384784619, "learning_rate": 1.2318532701303121e-08, "loss": 0.1391, "step": 12335 }, { "epoch": 0.9847529336632873, "grad_norm": 0.31506271258048585, "learning_rate": 1.2190574823578437e-08, "loss": 0.1545, "step": 12336 }, { "epoch": 0.9848327612357308, "grad_norm": 0.2789980368108216, "learning_rate": 1.206328459138817e-08, "loss": 0.1912, "step": 12337 }, { "epoch": 0.9849125888081743, "grad_norm": 0.27329183011268937, "learning_rate": 1.1936662013241063e-08, "loss": 0.1501, "step": 12338 }, { "epoch": 0.9849924163806179, "grad_norm": 0.2824776019211602, "learning_rate": 1.1810707097600349e-08, "loss": 0.1449, "step": 12339 }, { "epoch": 0.9850722439530614, "grad_norm": 0.2851364379579965, "learning_rate": 1.168541985288596e-08, "loss": 0.1281, "step": 12340 }, { "epoch": 0.9851520715255049, "grad_norm": 0.247959538284328, "learning_rate": 1.1560800287474528e-08, "loss": 0.1287, "step": 12341 }, { "epoch": 0.9852318990979484, "grad_norm": 0.245823494971918, "learning_rate": 1.1436848409693835e-08, "loss": 0.1357, "step": 12342 }, { "epoch": 0.9853117266703919, "grad_norm": 0.25600744725431906, "learning_rate": 1.1313564227831697e-08, "loss": 0.1492, "step": 12343 }, { "epoch": 0.9853915542428354, "grad_norm": 0.27984519623349224, "learning_rate": 1.119094775012819e-08, "loss": 0.1892, "step": 12344 }, { "epoch": 0.985471381815279, "grad_norm": 0.2817154721242504, "learning_rate": 1.1068998984780088e-08, "loss": 0.1298, "step": 12345 }, { "epoch": 0.9855512093877226, "grad_norm": 0.28341791419226287, "learning_rate": 1.0947717939938652e-08, "loss": 0.159, "step": 12346 }, { "epoch": 0.9856310369601661, "grad_norm": 0.2811210178762669, "learning_rate": 1.0827104623711837e-08, "loss": 0.1529, "step": 12347 }, { "epoch": 0.9857108645326096, "grad_norm": 0.29833419106880166, "learning_rate": 1.0707159044160975e-08, "loss": 0.1557, "step": 12348 }, { "epoch": 0.9857906921050531, "grad_norm": 0.32086592002420994, "learning_rate": 1.0587881209305207e-08, "loss": 0.135, "step": 12349 }, { "epoch": 0.9858705196774966, "grad_norm": 0.25488928893827234, "learning_rate": 1.0469271127117042e-08, "loss": 0.1442, "step": 12350 }, { "epoch": 0.9859503472499401, "grad_norm": 0.311574998343606, "learning_rate": 1.0351328805525696e-08, "loss": 0.1466, "step": 12351 }, { "epoch": 0.9860301748223836, "grad_norm": 0.24502147817567274, "learning_rate": 1.023405425241375e-08, "loss": 0.1424, "step": 12352 }, { "epoch": 0.9861100023948272, "grad_norm": 0.29099052094117245, "learning_rate": 1.011744747562271e-08, "loss": 0.1362, "step": 12353 }, { "epoch": 0.9861898299672707, "grad_norm": 0.3101305874269764, "learning_rate": 1.0001508482945233e-08, "loss": 0.1463, "step": 12354 }, { "epoch": 0.9862696575397142, "grad_norm": 0.31990129943054124, "learning_rate": 9.886237282132893e-09, "loss": 0.1908, "step": 12355 }, { "epoch": 0.9863494851121577, "grad_norm": 0.2484255130125854, "learning_rate": 9.771633880890641e-09, "loss": 0.1621, "step": 12356 }, { "epoch": 0.9864293126846012, "grad_norm": 0.2768107267770299, "learning_rate": 9.657698286877904e-09, "loss": 0.1377, "step": 12357 }, { "epoch": 0.9865091402570448, "grad_norm": 0.313177197933205, "learning_rate": 9.544430507713032e-09, "loss": 0.1874, "step": 12358 }, { "epoch": 0.9865889678294884, "grad_norm": 0.28573026283899844, "learning_rate": 9.431830550966636e-09, "loss": 0.2162, "step": 12359 }, { "epoch": 0.9866687954019319, "grad_norm": 0.2484990668976716, "learning_rate": 9.31989842416492e-09, "loss": 0.1265, "step": 12360 }, { "epoch": 0.9867486229743754, "grad_norm": 0.24827694807035838, "learning_rate": 9.208634134790784e-09, "loss": 0.1867, "step": 12361 }, { "epoch": 0.9868284505468189, "grad_norm": 0.2616274457688774, "learning_rate": 9.098037690282724e-09, "loss": 0.1151, "step": 12362 }, { "epoch": 0.9869082781192624, "grad_norm": 0.2896957930824914, "learning_rate": 8.988109098031495e-09, "loss": 0.1527, "step": 12363 }, { "epoch": 0.9869881056917059, "grad_norm": 0.264079549385085, "learning_rate": 8.878848365386772e-09, "loss": 0.1396, "step": 12364 }, { "epoch": 0.9870679332641494, "grad_norm": 0.26095558953360837, "learning_rate": 8.770255499651604e-09, "loss": 0.1369, "step": 12365 }, { "epoch": 0.987147760836593, "grad_norm": 0.3491469430966254, "learning_rate": 8.662330508085736e-09, "loss": 0.1183, "step": 12366 }, { "epoch": 0.9872275884090365, "grad_norm": 0.2992614665598018, "learning_rate": 8.555073397903401e-09, "loss": 0.129, "step": 12367 }, { "epoch": 0.98730741598148, "grad_norm": 0.292003762184355, "learning_rate": 8.448484176273308e-09, "loss": 0.1726, "step": 12368 }, { "epoch": 0.9873872435539235, "grad_norm": 0.25302341963296326, "learning_rate": 8.342562850321978e-09, "loss": 0.159, "step": 12369 }, { "epoch": 0.987467071126367, "grad_norm": 0.3017015177426465, "learning_rate": 8.237309427129303e-09, "loss": 0.1541, "step": 12370 }, { "epoch": 0.9875468986988105, "grad_norm": 0.2595544229989996, "learning_rate": 8.132723913729657e-09, "loss": 0.1402, "step": 12371 }, { "epoch": 0.9876267262712541, "grad_norm": 0.2858913316608372, "learning_rate": 8.028806317116334e-09, "loss": 0.1442, "step": 12372 }, { "epoch": 0.9877065538436977, "grad_norm": 0.2771625679352937, "learning_rate": 7.925556644234888e-09, "loss": 0.1702, "step": 12373 }, { "epoch": 0.9877863814161412, "grad_norm": 0.2903855585604611, "learning_rate": 7.82297490198647e-09, "loss": 0.1443, "step": 12374 }, { "epoch": 0.9878662089885847, "grad_norm": 0.2839607267997433, "learning_rate": 7.721061097228922e-09, "loss": 0.1705, "step": 12375 }, { "epoch": 0.9879460365610282, "grad_norm": 0.3953675812540948, "learning_rate": 7.619815236775684e-09, "loss": 0.2303, "step": 12376 }, { "epoch": 0.9880258641334717, "grad_norm": 0.2745399392559541, "learning_rate": 7.519237327393569e-09, "loss": 0.1695, "step": 12377 }, { "epoch": 0.9881056917059152, "grad_norm": 0.2710779936507726, "learning_rate": 7.419327375804975e-09, "loss": 0.164, "step": 12378 }, { "epoch": 0.9881855192783587, "grad_norm": 0.2535710526634715, "learning_rate": 7.320085388690113e-09, "loss": 0.1925, "step": 12379 }, { "epoch": 0.9882653468508023, "grad_norm": 0.30228572343877724, "learning_rate": 7.221511372682566e-09, "loss": 0.13, "step": 12380 }, { "epoch": 0.9883451744232458, "grad_norm": 0.2685297996116934, "learning_rate": 7.123605334371508e-09, "loss": 0.2029, "step": 12381 }, { "epoch": 0.9884250019956893, "grad_norm": 0.32306214261572525, "learning_rate": 7.0263672803005944e-09, "loss": 0.1212, "step": 12382 }, { "epoch": 0.9885048295681328, "grad_norm": 0.2829577194491632, "learning_rate": 6.92979721697129e-09, "loss": 0.1228, "step": 12383 }, { "epoch": 0.9885846571405763, "grad_norm": 0.2766095766379426, "learning_rate": 6.833895150837322e-09, "loss": 0.1607, "step": 12384 }, { "epoch": 0.9886644847130199, "grad_norm": 0.2963209623444092, "learning_rate": 6.738661088311338e-09, "loss": 0.1436, "step": 12385 }, { "epoch": 0.9887443122854634, "grad_norm": 0.27724372668231423, "learning_rate": 6.644095035757136e-09, "loss": 0.1734, "step": 12386 }, { "epoch": 0.988824139857907, "grad_norm": 0.29352397007214887, "learning_rate": 6.550196999498548e-09, "loss": 0.1859, "step": 12387 }, { "epoch": 0.9889039674303505, "grad_norm": 0.2825282862633156, "learning_rate": 6.4569669858105535e-09, "loss": 0.1479, "step": 12388 }, { "epoch": 0.988983795002794, "grad_norm": 0.32055065978513986, "learning_rate": 6.3644050009248334e-09, "loss": 0.1818, "step": 12389 }, { "epoch": 0.9890636225752375, "grad_norm": 0.2669268228514938, "learning_rate": 6.272511051030883e-09, "loss": 0.1621, "step": 12390 }, { "epoch": 0.989143450147681, "grad_norm": 0.31450963783373803, "learning_rate": 6.1812851422693445e-09, "loss": 0.1852, "step": 12391 }, { "epoch": 0.9892232777201245, "grad_norm": 0.27010355906603983, "learning_rate": 6.090727280739783e-09, "loss": 0.1131, "step": 12392 }, { "epoch": 0.989303105292568, "grad_norm": 0.2755526448895422, "learning_rate": 6.000837472494026e-09, "loss": 0.1636, "step": 12393 }, { "epoch": 0.9893829328650116, "grad_norm": 0.26318299555325225, "learning_rate": 5.9116157235428186e-09, "loss": 0.128, "step": 12394 }, { "epoch": 0.9894627604374551, "grad_norm": 0.2813189574912312, "learning_rate": 5.82306203984806e-09, "loss": 0.1735, "step": 12395 }, { "epoch": 0.9895425880098986, "grad_norm": 0.28186692861958196, "learning_rate": 5.735176427331679e-09, "loss": 0.1564, "step": 12396 }, { "epoch": 0.9896224155823421, "grad_norm": 0.2815426626187508, "learning_rate": 5.647958891866756e-09, "loss": 0.1667, "step": 12397 }, { "epoch": 0.9897022431547856, "grad_norm": 0.275205003782233, "learning_rate": 5.561409439284182e-09, "loss": 0.1826, "step": 12398 }, { "epoch": 0.9897820707272292, "grad_norm": 0.3047553169508559, "learning_rate": 5.47552807536822e-09, "loss": 0.172, "step": 12399 }, { "epoch": 0.9898618982996727, "grad_norm": 0.29565538010893333, "learning_rate": 5.390314805860941e-09, "loss": 0.1665, "step": 12400 }, { "epoch": 0.9899417258721163, "grad_norm": 0.27615447545394767, "learning_rate": 5.305769636458902e-09, "loss": 0.1543, "step": 12401 }, { "epoch": 0.9900215534445598, "grad_norm": 0.32085032914834194, "learning_rate": 5.221892572813137e-09, "loss": 0.166, "step": 12402 }, { "epoch": 0.9901013810170033, "grad_norm": 0.29415667217881086, "learning_rate": 5.138683620529161e-09, "loss": 0.1669, "step": 12403 }, { "epoch": 0.9901812085894468, "grad_norm": 0.2673418052905802, "learning_rate": 5.056142785171414e-09, "loss": 0.151, "step": 12404 }, { "epoch": 0.9902610361618903, "grad_norm": 0.25648266334136843, "learning_rate": 4.974270072255483e-09, "loss": 0.1291, "step": 12405 }, { "epoch": 0.9903408637343338, "grad_norm": 0.25574919529349915, "learning_rate": 4.893065487255877e-09, "loss": 0.1447, "step": 12406 }, { "epoch": 0.9904206913067773, "grad_norm": 0.270932871835374, "learning_rate": 4.812529035599367e-09, "loss": 0.1276, "step": 12407 }, { "epoch": 0.9905005188792209, "grad_norm": 0.2699039270082861, "learning_rate": 4.732660722669424e-09, "loss": 0.1252, "step": 12408 }, { "epoch": 0.9905803464516644, "grad_norm": 0.2670430121506101, "learning_rate": 4.653460553807332e-09, "loss": 0.1398, "step": 12409 }, { "epoch": 0.9906601740241079, "grad_norm": 0.30826399740565097, "learning_rate": 4.5749285343044125e-09, "loss": 0.1441, "step": 12410 }, { "epoch": 0.9907400015965514, "grad_norm": 0.2715620290443714, "learning_rate": 4.497064669410911e-09, "loss": 0.1938, "step": 12411 }, { "epoch": 0.990819829168995, "grad_norm": 0.2923464142084344, "learning_rate": 4.419868964333773e-09, "loss": 0.1253, "step": 12412 }, { "epoch": 0.9908996567414385, "grad_norm": 0.3025786948494094, "learning_rate": 4.3433414242299855e-09, "loss": 0.1455, "step": 12413 }, { "epoch": 0.990979484313882, "grad_norm": 0.22313612849310543, "learning_rate": 4.267482054217676e-09, "loss": 0.1078, "step": 12414 }, { "epoch": 0.9910593118863256, "grad_norm": 0.300196012751794, "learning_rate": 4.192290859367232e-09, "loss": 0.1262, "step": 12415 }, { "epoch": 0.9911391394587691, "grad_norm": 0.38817125455247253, "learning_rate": 4.117767844704634e-09, "loss": 0.1372, "step": 12416 }, { "epoch": 0.9912189670312126, "grad_norm": 0.2702085795270814, "learning_rate": 4.043913015210344e-09, "loss": 0.15, "step": 12417 }, { "epoch": 0.9912987946036561, "grad_norm": 0.3086624578650137, "learning_rate": 3.970726375822631e-09, "loss": 0.1495, "step": 12418 }, { "epoch": 0.9913786221760996, "grad_norm": 0.24954086378707788, "learning_rate": 3.89820793143314e-09, "loss": 0.1457, "step": 12419 }, { "epoch": 0.9914584497485431, "grad_norm": 0.3227556681154078, "learning_rate": 3.826357686890214e-09, "loss": 0.193, "step": 12420 }, { "epoch": 0.9915382773209866, "grad_norm": 0.24994374486150672, "learning_rate": 3.7551756469966785e-09, "loss": 0.1597, "step": 12421 }, { "epoch": 0.9916181048934302, "grad_norm": 0.2748354035718176, "learning_rate": 3.6846618165087277e-09, "loss": 0.2035, "step": 12422 }, { "epoch": 0.9916979324658737, "grad_norm": 0.28353605720159436, "learning_rate": 3.61481620014259e-09, "loss": 0.1447, "step": 12423 }, { "epoch": 0.9917777600383172, "grad_norm": 0.3264793159170269, "learning_rate": 3.545638802565643e-09, "loss": 0.1679, "step": 12424 }, { "epoch": 0.9918575876107607, "grad_norm": 0.32264289414897446, "learning_rate": 3.4771296284030755e-09, "loss": 0.1784, "step": 12425 }, { "epoch": 0.9919374151832043, "grad_norm": 0.2534186216920894, "learning_rate": 3.409288682233447e-09, "loss": 0.1476, "step": 12426 }, { "epoch": 0.9920172427556478, "grad_norm": 0.2600366206809738, "learning_rate": 3.342115968592019e-09, "loss": 0.1549, "step": 12427 }, { "epoch": 0.9920970703280914, "grad_norm": 0.2794951111506899, "learning_rate": 3.2756114919696434e-09, "loss": 0.198, "step": 12428 }, { "epoch": 0.9921768979005349, "grad_norm": 0.25445991679074575, "learning_rate": 3.2097752568105434e-09, "loss": 0.1875, "step": 12429 }, { "epoch": 0.9922567254729784, "grad_norm": 0.26001029605165693, "learning_rate": 3.144607267517863e-09, "loss": 0.1579, "step": 12430 }, { "epoch": 0.9923365530454219, "grad_norm": 0.2809548554016177, "learning_rate": 3.0801075284447867e-09, "loss": 0.1809, "step": 12431 }, { "epoch": 0.9924163806178654, "grad_norm": 0.2635005765384522, "learning_rate": 3.0162760439056416e-09, "loss": 0.1926, "step": 12432 }, { "epoch": 0.9924962081903089, "grad_norm": 0.24760852264078734, "learning_rate": 2.953112818165904e-09, "loss": 0.1887, "step": 12433 }, { "epoch": 0.9925760357627524, "grad_norm": 0.25675071088753176, "learning_rate": 2.8906178554477526e-09, "loss": 0.1667, "step": 12434 }, { "epoch": 0.992655863335196, "grad_norm": 0.2391216092759457, "learning_rate": 2.8287911599289563e-09, "loss": 0.1052, "step": 12435 }, { "epoch": 0.9927356909076395, "grad_norm": 0.29640170615478967, "learning_rate": 2.7676327357428758e-09, "loss": 0.1036, "step": 12436 }, { "epoch": 0.992815518480083, "grad_norm": 0.29946314928526935, "learning_rate": 2.7071425869773517e-09, "loss": 0.1656, "step": 12437 }, { "epoch": 0.9928953460525265, "grad_norm": 0.3001106828497219, "learning_rate": 2.6473207176758163e-09, "loss": 0.1237, "step": 12438 }, { "epoch": 0.9929751736249701, "grad_norm": 0.28154558240546673, "learning_rate": 2.5881671318361834e-09, "loss": 0.1916, "step": 12439 }, { "epoch": 0.9930550011974136, "grad_norm": 0.29532564348757245, "learning_rate": 2.529681833414177e-09, "loss": 0.1479, "step": 12440 }, { "epoch": 0.9931348287698571, "grad_norm": 0.2678547461110312, "learning_rate": 2.471864826318893e-09, "loss": 0.1303, "step": 12441 }, { "epoch": 0.9932146563423007, "grad_norm": 0.3504438994786956, "learning_rate": 2.4147161144150165e-09, "loss": 0.2371, "step": 12442 }, { "epoch": 0.9932944839147442, "grad_norm": 0.309318196958955, "learning_rate": 2.3582357015228265e-09, "loss": 0.154, "step": 12443 }, { "epoch": 0.9933743114871877, "grad_norm": 0.31429508183368293, "learning_rate": 2.30242359141708e-09, "loss": 0.1798, "step": 12444 }, { "epoch": 0.9934541390596312, "grad_norm": 0.29554769602728864, "learning_rate": 2.2472797878303477e-09, "loss": 0.196, "step": 12445 }, { "epoch": 0.9935339666320747, "grad_norm": 0.3145269738733131, "learning_rate": 2.192804294446349e-09, "loss": 0.1613, "step": 12446 }, { "epoch": 0.9936137942045182, "grad_norm": 0.2586936346510402, "learning_rate": 2.1389971149088362e-09, "loss": 0.1634, "step": 12447 }, { "epoch": 0.9936936217769617, "grad_norm": 0.2594634517981838, "learning_rate": 2.0858582528127114e-09, "loss": 0.1581, "step": 12448 }, { "epoch": 0.9937734493494053, "grad_norm": 0.2640373481306257, "learning_rate": 2.0333877117117982e-09, "loss": 0.2054, "step": 12449 }, { "epoch": 0.9938532769218488, "grad_norm": 0.2817682892092316, "learning_rate": 1.9815854951121817e-09, "loss": 0.1421, "step": 12450 }, { "epoch": 0.9939331044942923, "grad_norm": 0.26710400673589346, "learning_rate": 1.9304516064777566e-09, "loss": 0.1683, "step": 12451 }, { "epoch": 0.9940129320667358, "grad_norm": 0.2647133668927789, "learning_rate": 1.8799860492257906e-09, "loss": 0.1802, "step": 12452 }, { "epoch": 0.9940927596391794, "grad_norm": 0.24682645315126459, "learning_rate": 1.8301888267291401e-09, "loss": 0.1507, "step": 12453 }, { "epoch": 0.9941725872116229, "grad_norm": 0.267282350401298, "learning_rate": 1.781059942318475e-09, "loss": 0.1522, "step": 12454 }, { "epoch": 0.9942524147840665, "grad_norm": 0.35500590059908493, "learning_rate": 1.7325993992767242e-09, "loss": 0.1647, "step": 12455 }, { "epoch": 0.99433224235651, "grad_norm": 0.25676012500554873, "learning_rate": 1.684807200843519e-09, "loss": 0.1273, "step": 12456 }, { "epoch": 0.9944120699289535, "grad_norm": 0.2282909169739876, "learning_rate": 1.6376833502118605e-09, "loss": 0.1403, "step": 12457 }, { "epoch": 0.994491897501397, "grad_norm": 0.2802063737242042, "learning_rate": 1.5912278505347822e-09, "loss": 0.1769, "step": 12458 }, { "epoch": 0.9945717250738405, "grad_norm": 0.25787815637342854, "learning_rate": 1.5454407049164677e-09, "loss": 0.1789, "step": 12459 }, { "epoch": 0.994651552646284, "grad_norm": 0.2672648093442295, "learning_rate": 1.5003219164166916e-09, "loss": 0.1113, "step": 12460 }, { "epoch": 0.9947313802187275, "grad_norm": 0.24666093997688737, "learning_rate": 1.4558714880519298e-09, "loss": 0.1187, "step": 12461 }, { "epoch": 0.994811207791171, "grad_norm": 0.2888577478165622, "learning_rate": 1.4120894227931393e-09, "loss": 0.1642, "step": 12462 }, { "epoch": 0.9948910353636146, "grad_norm": 0.3467371909011357, "learning_rate": 1.3689757235690882e-09, "loss": 0.1093, "step": 12463 }, { "epoch": 0.9949708629360581, "grad_norm": 0.33446773932643736, "learning_rate": 1.3265303932585848e-09, "loss": 0.1486, "step": 12464 }, { "epoch": 0.9950506905085016, "grad_norm": 0.25124219663878844, "learning_rate": 1.28475343470158e-09, "loss": 0.1366, "step": 12465 }, { "epoch": 0.9951305180809451, "grad_norm": 0.2460798979789191, "learning_rate": 1.2436448506891742e-09, "loss": 0.1681, "step": 12466 }, { "epoch": 0.9952103456533887, "grad_norm": 0.2709533383307475, "learning_rate": 1.2032046439702793e-09, "loss": 0.146, "step": 12467 }, { "epoch": 0.9952901732258322, "grad_norm": 0.3029938069567742, "learning_rate": 1.1634328172471786e-09, "loss": 0.1926, "step": 12468 }, { "epoch": 0.9953700007982758, "grad_norm": 0.26836460892728053, "learning_rate": 1.1243293731799665e-09, "loss": 0.1507, "step": 12469 }, { "epoch": 0.9954498283707193, "grad_norm": 0.3000551030965867, "learning_rate": 1.0858943143809975e-09, "loss": 0.2035, "step": 12470 }, { "epoch": 0.9955296559431628, "grad_norm": 0.3376046684140317, "learning_rate": 1.0481276434193277e-09, "loss": 0.1897, "step": 12471 }, { "epoch": 0.9956094835156063, "grad_norm": 0.28798627977519675, "learning_rate": 1.0110293628207146e-09, "loss": 0.1331, "step": 12472 }, { "epoch": 0.9956893110880498, "grad_norm": 0.26810883312561556, "learning_rate": 9.745994750653964e-10, "loss": 0.1749, "step": 12473 }, { "epoch": 0.9957691386604933, "grad_norm": 0.3330053501683471, "learning_rate": 9.38837982586982e-10, "loss": 0.1374, "step": 12474 }, { "epoch": 0.9958489662329368, "grad_norm": 0.2978271607466382, "learning_rate": 9.037448877768917e-10, "loss": 0.1632, "step": 12475 }, { "epoch": 0.9959287938053804, "grad_norm": 0.29825928974273047, "learning_rate": 8.693201929810269e-10, "loss": 0.1352, "step": 12476 }, { "epoch": 0.9960086213778239, "grad_norm": 0.28632494743527687, "learning_rate": 8.355639004997696e-10, "loss": 0.1623, "step": 12477 }, { "epoch": 0.9960884489502674, "grad_norm": 0.28317530860924056, "learning_rate": 8.024760125902031e-10, "loss": 0.165, "step": 12478 }, { "epoch": 0.9961682765227109, "grad_norm": 0.2904711830938501, "learning_rate": 7.700565314638919e-10, "loss": 0.1927, "step": 12479 }, { "epoch": 0.9962481040951545, "grad_norm": 0.26918504906102275, "learning_rate": 7.383054592891015e-10, "loss": 0.1563, "step": 12480 }, { "epoch": 0.996327931667598, "grad_norm": 0.33987125214627534, "learning_rate": 7.072227981863577e-10, "loss": 0.2535, "step": 12481 }, { "epoch": 0.9964077592400415, "grad_norm": 0.29401453140048717, "learning_rate": 6.768085502351085e-10, "loss": 0.1634, "step": 12482 }, { "epoch": 0.9964875868124851, "grad_norm": 0.3117549490902915, "learning_rate": 6.470627174670619e-10, "loss": 0.1675, "step": 12483 }, { "epoch": 0.9965674143849286, "grad_norm": 0.2689180964969069, "learning_rate": 6.179853018717375e-10, "loss": 0.1242, "step": 12484 }, { "epoch": 0.9966472419573721, "grad_norm": 0.29404329342359076, "learning_rate": 5.895763053920256e-10, "loss": 0.1883, "step": 12485 }, { "epoch": 0.9967270695298156, "grad_norm": 0.22456636753931003, "learning_rate": 5.618357299264077e-10, "loss": 0.1425, "step": 12486 }, { "epoch": 0.9968068971022591, "grad_norm": 0.2681433613732099, "learning_rate": 5.347635773311766e-10, "loss": 0.1395, "step": 12487 }, { "epoch": 0.9968867246747026, "grad_norm": 0.27022685650081124, "learning_rate": 5.083598494148856e-10, "loss": 0.1543, "step": 12488 }, { "epoch": 0.9969665522471461, "grad_norm": 0.28388593764615366, "learning_rate": 4.826245479416792e-10, "loss": 0.1447, "step": 12489 }, { "epoch": 0.9970463798195897, "grad_norm": 0.3328991490213581, "learning_rate": 4.575576746335131e-10, "loss": 0.1661, "step": 12490 }, { "epoch": 0.9971262073920332, "grad_norm": 0.27343145420397724, "learning_rate": 4.33159231165714e-10, "loss": 0.1819, "step": 12491 }, { "epoch": 0.9972060349644767, "grad_norm": 0.2967648824593527, "learning_rate": 4.0942921916697907e-10, "loss": 0.1808, "step": 12492 }, { "epoch": 0.9972858625369202, "grad_norm": 0.26923557842864976, "learning_rate": 3.863676402271477e-10, "loss": 0.1141, "step": 12493 }, { "epoch": 0.9973656901093638, "grad_norm": 0.2979387856508881, "learning_rate": 3.639744958849889e-10, "loss": 0.1972, "step": 12494 }, { "epoch": 0.9974455176818073, "grad_norm": 0.2715791479752598, "learning_rate": 3.422497876381936e-10, "loss": 0.1451, "step": 12495 }, { "epoch": 0.9975253452542509, "grad_norm": 0.3558598895852963, "learning_rate": 3.2119351693893353e-10, "loss": 0.1563, "step": 12496 }, { "epoch": 0.9976051728266944, "grad_norm": 0.2730944020788026, "learning_rate": 3.0080568519608164e-10, "loss": 0.1617, "step": 12497 }, { "epoch": 0.9976850003991379, "grad_norm": 0.2676116464851507, "learning_rate": 2.810862937696612e-10, "loss": 0.1347, "step": 12498 }, { "epoch": 0.9977648279715814, "grad_norm": 0.31307542861152526, "learning_rate": 2.6203534398083764e-10, "loss": 0.153, "step": 12499 }, { "epoch": 0.9978446555440249, "grad_norm": 0.2993216298927833, "learning_rate": 2.436528371019264e-10, "loss": 0.1851, "step": 12500 }, { "epoch": 0.9979244831164684, "grad_norm": 0.2549904714919623, "learning_rate": 2.2593877436083433e-10, "loss": 0.1547, "step": 12501 }, { "epoch": 0.9980043106889119, "grad_norm": 0.28901188532169975, "learning_rate": 2.0889315694216927e-10, "loss": 0.2404, "step": 12502 }, { "epoch": 0.9980841382613554, "grad_norm": 0.24069022009712054, "learning_rate": 1.925159859861303e-10, "loss": 0.1515, "step": 12503 }, { "epoch": 0.998163965833799, "grad_norm": 0.312036767553585, "learning_rate": 1.7680726258739733e-10, "loss": 0.1895, "step": 12504 }, { "epoch": 0.9982437934062425, "grad_norm": 0.2856560363391939, "learning_rate": 1.617669877951311e-10, "loss": 0.1414, "step": 12505 }, { "epoch": 0.998323620978686, "grad_norm": 0.27169484633803703, "learning_rate": 1.473951626140835e-10, "loss": 0.1404, "step": 12506 }, { "epoch": 0.9984034485511296, "grad_norm": 0.266150983882747, "learning_rate": 1.3369178800792804e-10, "loss": 0.1112, "step": 12507 }, { "epoch": 0.9984832761235731, "grad_norm": 0.31235239716492397, "learning_rate": 1.206568648892681e-10, "loss": 0.1583, "step": 12508 }, { "epoch": 0.9985631036960166, "grad_norm": 0.28792504274615893, "learning_rate": 1.0829039413184917e-10, "loss": 0.1701, "step": 12509 }, { "epoch": 0.9986429312684602, "grad_norm": 0.2872262507835265, "learning_rate": 9.659237656056696e-11, "loss": 0.1457, "step": 12510 }, { "epoch": 0.9987227588409037, "grad_norm": 0.3115980251290437, "learning_rate": 8.556281295923896e-11, "loss": 0.1402, "step": 12511 }, { "epoch": 0.9988025864133472, "grad_norm": 0.27616924426253137, "learning_rate": 7.5201704063943e-11, "loss": 0.1546, "step": 12512 }, { "epoch": 0.9988824139857907, "grad_norm": 0.31688432205969586, "learning_rate": 6.550905056745827e-11, "loss": 0.1848, "step": 12513 }, { "epoch": 0.9989622415582342, "grad_norm": 0.26672396609119514, "learning_rate": 5.648485311704477e-11, "loss": 0.1896, "step": 12514 }, { "epoch": 0.9990420691306777, "grad_norm": 0.25687266752273974, "learning_rate": 4.812911231666384e-11, "loss": 0.1313, "step": 12515 }, { "epoch": 0.9991218967031212, "grad_norm": 0.2761674304316039, "learning_rate": 4.044182872586788e-11, "loss": 0.1486, "step": 12516 }, { "epoch": 0.9992017242755648, "grad_norm": 0.3139495609621679, "learning_rate": 3.342300285646971e-11, "loss": 0.1631, "step": 12517 }, { "epoch": 0.9992815518480083, "grad_norm": 0.29359367756157195, "learning_rate": 2.7072635179203887e-11, "loss": 0.1264, "step": 12518 }, { "epoch": 0.9993613794204518, "grad_norm": 0.2689772451887055, "learning_rate": 2.139072611817561e-11, "loss": 0.1058, "step": 12519 }, { "epoch": 0.9994412069928953, "grad_norm": 0.26309402532374065, "learning_rate": 1.6377276051970925e-11, "loss": 0.172, "step": 12520 }, { "epoch": 0.9995210345653389, "grad_norm": 0.23880420243102574, "learning_rate": 1.2032285318097636e-11, "loss": 0.196, "step": 12521 }, { "epoch": 0.9996008621377824, "grad_norm": 0.32068045681164814, "learning_rate": 8.355754205213728e-12, "loss": 0.1787, "step": 12522 }, { "epoch": 0.999680689710226, "grad_norm": 0.2773332790051744, "learning_rate": 5.347682959788714e-12, "loss": 0.1838, "step": 12523 }, { "epoch": 0.9997605172826695, "grad_norm": 0.24685031793586695, "learning_rate": 3.008071781662736e-12, "loss": 0.12, "step": 12524 }, { "epoch": 0.999840344855113, "grad_norm": 0.3263037939322937, "learning_rate": 1.3369208295976877e-12, "loss": 0.1679, "step": 12525 }, { "epoch": 0.9999201724275565, "grad_norm": 0.33420945217929404, "learning_rate": 3.342302123954255e-13, "loss": 0.1838, "step": 12526 }, { "epoch": 1.0, "grad_norm": 0.26993676223335955, "learning_rate": 0.0, "loss": 0.1845, "step": 12527 }, { "epoch": 1.0, "step": 12527, "total_flos": 2.052542836703232e+16, "train_loss": 0.23671708108624276, "train_runtime": 93664.0947, "train_samples_per_second": 17.119, "train_steps_per_second": 0.134 } ], "logging_steps": 1.0, "max_steps": 12527, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.052542836703232e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }