{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.99916130835896, "eval_steps": 500, "global_step": 3576, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008386916410399776, "grad_norm": 6.255380832306185, "learning_rate": 2.793296089385475e-08, "loss": 0.809, "step": 1 }, { "epoch": 0.0016773832820799553, "grad_norm": 6.355460668904089, "learning_rate": 5.58659217877095e-08, "loss": 0.826, "step": 2 }, { "epoch": 0.002516074923119933, "grad_norm": 6.049309331789211, "learning_rate": 8.379888268156426e-08, "loss": 0.8006, "step": 3 }, { "epoch": 0.0033547665641599105, "grad_norm": 6.192088217393975, "learning_rate": 1.11731843575419e-07, "loss": 0.8085, "step": 4 }, { "epoch": 0.004193458205199888, "grad_norm": 5.900964503590395, "learning_rate": 1.3966480446927375e-07, "loss": 0.7796, "step": 5 }, { "epoch": 0.005032149846239866, "grad_norm": 5.955651937489644, "learning_rate": 1.6759776536312851e-07, "loss": 0.793, "step": 6 }, { "epoch": 0.005870841487279843, "grad_norm": 6.13853152747071, "learning_rate": 1.9553072625698325e-07, "loss": 0.8012, "step": 7 }, { "epoch": 0.006709533128319821, "grad_norm": 6.129594478798253, "learning_rate": 2.23463687150838e-07, "loss": 0.7975, "step": 8 }, { "epoch": 0.007548224769359799, "grad_norm": 5.886754156545561, "learning_rate": 2.5139664804469275e-07, "loss": 0.773, "step": 9 }, { "epoch": 0.008386916410399776, "grad_norm": 6.130150024168805, "learning_rate": 2.793296089385475e-07, "loss": 0.8215, "step": 10 }, { "epoch": 0.009225608051439753, "grad_norm": 6.058651585592474, "learning_rate": 3.0726256983240227e-07, "loss": 0.7785, "step": 11 }, { "epoch": 0.010064299692479732, "grad_norm": 6.090708157235268, "learning_rate": 3.3519553072625703e-07, "loss": 0.7975, "step": 12 }, { "epoch": 0.01090299133351971, "grad_norm": 6.2076740028524044, "learning_rate": 3.631284916201118e-07, "loss": 0.8197, "step": 13 }, { "epoch": 0.011741682974559686, "grad_norm": 5.821150420669324, "learning_rate": 3.910614525139665e-07, "loss": 0.8031, "step": 14 }, { "epoch": 0.012580374615599665, "grad_norm": 5.613001576722442, "learning_rate": 4.1899441340782126e-07, "loss": 0.7692, "step": 15 }, { "epoch": 0.013419066256639642, "grad_norm": 5.724275271366094, "learning_rate": 4.46927374301676e-07, "loss": 0.7732, "step": 16 }, { "epoch": 0.01425775789767962, "grad_norm": 5.7740926756061475, "learning_rate": 4.7486033519553073e-07, "loss": 0.7809, "step": 17 }, { "epoch": 0.015096449538719598, "grad_norm": 5.837800208275915, "learning_rate": 5.027932960893855e-07, "loss": 0.8055, "step": 18 }, { "epoch": 0.015935141179759575, "grad_norm": 4.609317166431601, "learning_rate": 5.307262569832403e-07, "loss": 0.7174, "step": 19 }, { "epoch": 0.016773832820799552, "grad_norm": 4.61893309261793, "learning_rate": 5.58659217877095e-07, "loss": 0.7236, "step": 20 }, { "epoch": 0.01761252446183953, "grad_norm": 4.6175434057967335, "learning_rate": 5.865921787709498e-07, "loss": 0.717, "step": 21 }, { "epoch": 0.018451216102879506, "grad_norm": 4.699301213186183, "learning_rate": 6.145251396648045e-07, "loss": 0.7369, "step": 22 }, { "epoch": 0.019289907743919487, "grad_norm": 4.554045661915634, "learning_rate": 6.424581005586592e-07, "loss": 0.7301, "step": 23 }, { "epoch": 0.020128599384959464, "grad_norm": 4.4800065641610125, "learning_rate": 6.703910614525141e-07, "loss": 0.7295, "step": 24 }, { "epoch": 0.02096729102599944, "grad_norm": 3.2937870052738303, "learning_rate": 6.983240223463688e-07, "loss": 0.7059, "step": 25 }, { "epoch": 0.02180598266703942, "grad_norm": 2.596429525769539, "learning_rate": 7.262569832402236e-07, "loss": 0.6654, "step": 26 }, { "epoch": 0.022644674308079395, "grad_norm": 2.607591675186506, "learning_rate": 7.541899441340783e-07, "loss": 0.662, "step": 27 }, { "epoch": 0.023483365949119372, "grad_norm": 2.536803182747453, "learning_rate": 7.82122905027933e-07, "loss": 0.6844, "step": 28 }, { "epoch": 0.024322057590159353, "grad_norm": 2.438132239334011, "learning_rate": 8.100558659217877e-07, "loss": 0.6678, "step": 29 }, { "epoch": 0.02516074923119933, "grad_norm": 2.5719736909181794, "learning_rate": 8.379888268156425e-07, "loss": 0.689, "step": 30 }, { "epoch": 0.025999440872239307, "grad_norm": 2.3274108168192744, "learning_rate": 8.659217877094973e-07, "loss": 0.6754, "step": 31 }, { "epoch": 0.026838132513279284, "grad_norm": 2.1954977438072394, "learning_rate": 8.93854748603352e-07, "loss": 0.6602, "step": 32 }, { "epoch": 0.02767682415431926, "grad_norm": 1.9842746940466578, "learning_rate": 9.217877094972068e-07, "loss": 0.6675, "step": 33 }, { "epoch": 0.02851551579535924, "grad_norm": 1.648597874855098, "learning_rate": 9.497206703910615e-07, "loss": 0.6338, "step": 34 }, { "epoch": 0.029354207436399216, "grad_norm": 1.8682544181492062, "learning_rate": 9.776536312849163e-07, "loss": 0.6561, "step": 35 }, { "epoch": 0.030192899077439196, "grad_norm": 2.0334435108960194, "learning_rate": 1.005586592178771e-06, "loss": 0.6185, "step": 36 }, { "epoch": 0.031031590718479173, "grad_norm": 2.1177187176512033, "learning_rate": 1.033519553072626e-06, "loss": 0.6334, "step": 37 }, { "epoch": 0.03187028235951915, "grad_norm": 2.2505645389261555, "learning_rate": 1.0614525139664806e-06, "loss": 0.6247, "step": 38 }, { "epoch": 0.03270897400055913, "grad_norm": 2.128981251237759, "learning_rate": 1.0893854748603353e-06, "loss": 0.6144, "step": 39 }, { "epoch": 0.033547665641599105, "grad_norm": 2.1016520056815833, "learning_rate": 1.11731843575419e-06, "loss": 0.6044, "step": 40 }, { "epoch": 0.034386357282639085, "grad_norm": 2.1258726882591636, "learning_rate": 1.1452513966480447e-06, "loss": 0.6298, "step": 41 }, { "epoch": 0.03522504892367906, "grad_norm": 1.7379323879334034, "learning_rate": 1.1731843575418997e-06, "loss": 0.5932, "step": 42 }, { "epoch": 0.03606374056471904, "grad_norm": 1.5486847910865544, "learning_rate": 1.2011173184357544e-06, "loss": 0.5888, "step": 43 }, { "epoch": 0.03690243220575901, "grad_norm": 1.3081594738928055, "learning_rate": 1.229050279329609e-06, "loss": 0.6077, "step": 44 }, { "epoch": 0.03774112384679899, "grad_norm": 1.1079097427476334, "learning_rate": 1.2569832402234638e-06, "loss": 0.6179, "step": 45 }, { "epoch": 0.038579815487838974, "grad_norm": 1.0254552770233207, "learning_rate": 1.2849162011173185e-06, "loss": 0.5347, "step": 46 }, { "epoch": 0.03941850712887895, "grad_norm": 1.038456215832694, "learning_rate": 1.3128491620111732e-06, "loss": 0.562, "step": 47 }, { "epoch": 0.04025719876991893, "grad_norm": 1.0519439276713827, "learning_rate": 1.3407821229050281e-06, "loss": 0.5424, "step": 48 }, { "epoch": 0.0410958904109589, "grad_norm": 0.9581049037900412, "learning_rate": 1.3687150837988828e-06, "loss": 0.5619, "step": 49 }, { "epoch": 0.04193458205199888, "grad_norm": 0.9570978740746406, "learning_rate": 1.3966480446927375e-06, "loss": 0.525, "step": 50 }, { "epoch": 0.042773273693038856, "grad_norm": 0.8234631049273807, "learning_rate": 1.4245810055865922e-06, "loss": 0.5479, "step": 51 }, { "epoch": 0.04361196533407884, "grad_norm": 0.7473905694058951, "learning_rate": 1.4525139664804472e-06, "loss": 0.5311, "step": 52 }, { "epoch": 0.04445065697511882, "grad_norm": 0.7798367855546835, "learning_rate": 1.4804469273743019e-06, "loss": 0.5632, "step": 53 }, { "epoch": 0.04528934861615879, "grad_norm": 0.8801598211559716, "learning_rate": 1.5083798882681566e-06, "loss": 0.5553, "step": 54 }, { "epoch": 0.04612804025719877, "grad_norm": 0.6900908109322089, "learning_rate": 1.5363128491620113e-06, "loss": 0.5205, "step": 55 }, { "epoch": 0.046966731898238745, "grad_norm": 0.7530222059699374, "learning_rate": 1.564245810055866e-06, "loss": 0.532, "step": 56 }, { "epoch": 0.047805423539278726, "grad_norm": 0.6947014501481511, "learning_rate": 1.5921787709497207e-06, "loss": 0.5188, "step": 57 }, { "epoch": 0.048644115180318706, "grad_norm": 0.6417872312143463, "learning_rate": 1.6201117318435754e-06, "loss": 0.5427, "step": 58 }, { "epoch": 0.04948280682135868, "grad_norm": 0.5917146970492029, "learning_rate": 1.6480446927374303e-06, "loss": 0.505, "step": 59 }, { "epoch": 0.05032149846239866, "grad_norm": 0.6163899046588881, "learning_rate": 1.675977653631285e-06, "loss": 0.5196, "step": 60 }, { "epoch": 0.051160190103438634, "grad_norm": 0.6218297717086538, "learning_rate": 1.7039106145251397e-06, "loss": 0.5128, "step": 61 }, { "epoch": 0.051998881744478614, "grad_norm": 0.7192114399276618, "learning_rate": 1.7318435754189947e-06, "loss": 0.5146, "step": 62 }, { "epoch": 0.05283757338551859, "grad_norm": 0.6108292048142744, "learning_rate": 1.7597765363128494e-06, "loss": 0.541, "step": 63 }, { "epoch": 0.05367626502655857, "grad_norm": 0.5799506091991435, "learning_rate": 1.787709497206704e-06, "loss": 0.5087, "step": 64 }, { "epoch": 0.05451495666759855, "grad_norm": 0.5091333704204054, "learning_rate": 1.8156424581005588e-06, "loss": 0.5225, "step": 65 }, { "epoch": 0.05535364830863852, "grad_norm": 0.5172189689376188, "learning_rate": 1.8435754189944135e-06, "loss": 0.4904, "step": 66 }, { "epoch": 0.0561923399496785, "grad_norm": 0.5628243457440915, "learning_rate": 1.8715083798882682e-06, "loss": 0.4814, "step": 67 }, { "epoch": 0.05703103159071848, "grad_norm": 0.5511097528514916, "learning_rate": 1.899441340782123e-06, "loss": 0.5168, "step": 68 }, { "epoch": 0.05786972323175846, "grad_norm": 0.5991844067227475, "learning_rate": 1.927374301675978e-06, "loss": 0.5134, "step": 69 }, { "epoch": 0.05870841487279843, "grad_norm": 0.5844828529030837, "learning_rate": 1.9553072625698325e-06, "loss": 0.4878, "step": 70 }, { "epoch": 0.05954710651383841, "grad_norm": 0.507280649345282, "learning_rate": 1.9832402234636873e-06, "loss": 0.4876, "step": 71 }, { "epoch": 0.06038579815487839, "grad_norm": 0.48346508210719336, "learning_rate": 2.011173184357542e-06, "loss": 0.4563, "step": 72 }, { "epoch": 0.061224489795918366, "grad_norm": 0.542092151901589, "learning_rate": 2.039106145251397e-06, "loss": 0.5078, "step": 73 }, { "epoch": 0.062063181436958346, "grad_norm": 0.5664275049214952, "learning_rate": 2.067039106145252e-06, "loss": 0.4997, "step": 74 }, { "epoch": 0.06290187307799833, "grad_norm": 0.4677844761612876, "learning_rate": 2.0949720670391065e-06, "loss": 0.4917, "step": 75 }, { "epoch": 0.0637405647190383, "grad_norm": 0.47828952394983093, "learning_rate": 2.1229050279329612e-06, "loss": 0.4838, "step": 76 }, { "epoch": 0.06457925636007827, "grad_norm": 0.4811066051395582, "learning_rate": 2.150837988826816e-06, "loss": 0.4991, "step": 77 }, { "epoch": 0.06541794800111826, "grad_norm": 0.4669716426695699, "learning_rate": 2.1787709497206706e-06, "loss": 0.4872, "step": 78 }, { "epoch": 0.06625663964215824, "grad_norm": 0.5294684944901448, "learning_rate": 2.2067039106145253e-06, "loss": 0.4906, "step": 79 }, { "epoch": 0.06709533128319821, "grad_norm": 0.48741063728743683, "learning_rate": 2.23463687150838e-06, "loss": 0.4934, "step": 80 }, { "epoch": 0.06793402292423818, "grad_norm": 0.4923512136993158, "learning_rate": 2.2625698324022348e-06, "loss": 0.4756, "step": 81 }, { "epoch": 0.06877271456527817, "grad_norm": 0.4787855098075051, "learning_rate": 2.2905027932960895e-06, "loss": 0.4692, "step": 82 }, { "epoch": 0.06961140620631814, "grad_norm": 0.43551980925333617, "learning_rate": 2.318435754189944e-06, "loss": 0.5207, "step": 83 }, { "epoch": 0.07045009784735812, "grad_norm": 0.49763213193493955, "learning_rate": 2.3463687150837993e-06, "loss": 0.4661, "step": 84 }, { "epoch": 0.0712887894883981, "grad_norm": 0.47426788660609187, "learning_rate": 2.374301675977654e-06, "loss": 0.4986, "step": 85 }, { "epoch": 0.07212748112943808, "grad_norm": 0.4989314059733776, "learning_rate": 2.4022346368715087e-06, "loss": 0.4685, "step": 86 }, { "epoch": 0.07296617277047805, "grad_norm": 0.4348724044509492, "learning_rate": 2.4301675977653634e-06, "loss": 0.4544, "step": 87 }, { "epoch": 0.07380486441151803, "grad_norm": 0.45497648709409216, "learning_rate": 2.458100558659218e-06, "loss": 0.4574, "step": 88 }, { "epoch": 0.07464355605255801, "grad_norm": 0.4584494221098043, "learning_rate": 2.486033519553073e-06, "loss": 0.4676, "step": 89 }, { "epoch": 0.07548224769359799, "grad_norm": 0.4483722319587477, "learning_rate": 2.5139664804469276e-06, "loss": 0.455, "step": 90 }, { "epoch": 0.07632093933463796, "grad_norm": 0.45415131439248013, "learning_rate": 2.5418994413407823e-06, "loss": 0.472, "step": 91 }, { "epoch": 0.07715963097567795, "grad_norm": 0.4412839302919759, "learning_rate": 2.569832402234637e-06, "loss": 0.4571, "step": 92 }, { "epoch": 0.07799832261671792, "grad_norm": 0.4837261156152467, "learning_rate": 2.5977653631284917e-06, "loss": 0.4792, "step": 93 }, { "epoch": 0.0788370142577579, "grad_norm": 0.4404475105708723, "learning_rate": 2.6256983240223464e-06, "loss": 0.4802, "step": 94 }, { "epoch": 0.07967570589879787, "grad_norm": 0.44666835494180285, "learning_rate": 2.6536312849162015e-06, "loss": 0.4752, "step": 95 }, { "epoch": 0.08051439753983786, "grad_norm": 0.461496116618733, "learning_rate": 2.6815642458100562e-06, "loss": 0.4673, "step": 96 }, { "epoch": 0.08135308918087783, "grad_norm": 0.469205996273289, "learning_rate": 2.709497206703911e-06, "loss": 0.4767, "step": 97 }, { "epoch": 0.0821917808219178, "grad_norm": 0.4225799582959266, "learning_rate": 2.7374301675977656e-06, "loss": 0.4377, "step": 98 }, { "epoch": 0.08303047246295779, "grad_norm": 0.4874273891518889, "learning_rate": 2.7653631284916204e-06, "loss": 0.4697, "step": 99 }, { "epoch": 0.08386916410399776, "grad_norm": 0.444659501589986, "learning_rate": 2.793296089385475e-06, "loss": 0.4483, "step": 100 }, { "epoch": 0.08470785574503774, "grad_norm": 0.4099217391078781, "learning_rate": 2.8212290502793298e-06, "loss": 0.4389, "step": 101 }, { "epoch": 0.08554654738607771, "grad_norm": 0.43010024681103226, "learning_rate": 2.8491620111731845e-06, "loss": 0.4766, "step": 102 }, { "epoch": 0.0863852390271177, "grad_norm": 0.4450722896563478, "learning_rate": 2.877094972067039e-06, "loss": 0.484, "step": 103 }, { "epoch": 0.08722393066815767, "grad_norm": 0.41794983919004813, "learning_rate": 2.9050279329608943e-06, "loss": 0.4405, "step": 104 }, { "epoch": 0.08806262230919765, "grad_norm": 0.46904435161800273, "learning_rate": 2.9329608938547486e-06, "loss": 0.5091, "step": 105 }, { "epoch": 0.08890131395023763, "grad_norm": 0.47305621412286347, "learning_rate": 2.9608938547486037e-06, "loss": 0.4458, "step": 106 }, { "epoch": 0.08974000559127761, "grad_norm": 0.45880509785123713, "learning_rate": 2.9888268156424584e-06, "loss": 0.4547, "step": 107 }, { "epoch": 0.09057869723231758, "grad_norm": 0.44866465019683704, "learning_rate": 3.016759776536313e-06, "loss": 0.4771, "step": 108 }, { "epoch": 0.09141738887335757, "grad_norm": 0.4272285247705359, "learning_rate": 3.044692737430168e-06, "loss": 0.4545, "step": 109 }, { "epoch": 0.09225608051439754, "grad_norm": 0.45475241584336024, "learning_rate": 3.0726256983240226e-06, "loss": 0.4643, "step": 110 }, { "epoch": 0.09309477215543752, "grad_norm": 0.49761100633477556, "learning_rate": 3.1005586592178773e-06, "loss": 0.482, "step": 111 }, { "epoch": 0.09393346379647749, "grad_norm": 0.4720043626388274, "learning_rate": 3.128491620111732e-06, "loss": 0.4557, "step": 112 }, { "epoch": 0.09477215543751748, "grad_norm": 0.4293503979672631, "learning_rate": 3.1564245810055867e-06, "loss": 0.4502, "step": 113 }, { "epoch": 0.09561084707855745, "grad_norm": 0.4961714085326301, "learning_rate": 3.1843575418994414e-06, "loss": 0.4426, "step": 114 }, { "epoch": 0.09644953871959742, "grad_norm": 0.4702356387982089, "learning_rate": 3.2122905027932965e-06, "loss": 0.4537, "step": 115 }, { "epoch": 0.09728823036063741, "grad_norm": 0.41998566472465526, "learning_rate": 3.240223463687151e-06, "loss": 0.4384, "step": 116 }, { "epoch": 0.09812692200167739, "grad_norm": 0.46830717175557857, "learning_rate": 3.268156424581006e-06, "loss": 0.4514, "step": 117 }, { "epoch": 0.09896561364271736, "grad_norm": 0.42080000269307083, "learning_rate": 3.2960893854748607e-06, "loss": 0.4383, "step": 118 }, { "epoch": 0.09980430528375733, "grad_norm": 0.4540315120173948, "learning_rate": 3.3240223463687154e-06, "loss": 0.4529, "step": 119 }, { "epoch": 0.10064299692479732, "grad_norm": 0.4882435832548587, "learning_rate": 3.35195530726257e-06, "loss": 0.4421, "step": 120 }, { "epoch": 0.1014816885658373, "grad_norm": 0.4590853734842999, "learning_rate": 3.3798882681564248e-06, "loss": 0.4303, "step": 121 }, { "epoch": 0.10232038020687727, "grad_norm": 0.46062784702191495, "learning_rate": 3.4078212290502795e-06, "loss": 0.4403, "step": 122 }, { "epoch": 0.10315907184791726, "grad_norm": 0.4454667141447797, "learning_rate": 3.435754189944134e-06, "loss": 0.4401, "step": 123 }, { "epoch": 0.10399776348895723, "grad_norm": 0.43449565662259, "learning_rate": 3.4636871508379893e-06, "loss": 0.4433, "step": 124 }, { "epoch": 0.1048364551299972, "grad_norm": 0.5016415456969524, "learning_rate": 3.4916201117318436e-06, "loss": 0.4527, "step": 125 }, { "epoch": 0.10567514677103718, "grad_norm": 0.5281465078488423, "learning_rate": 3.5195530726256988e-06, "loss": 0.4644, "step": 126 }, { "epoch": 0.10651383841207716, "grad_norm": 0.5092893023417355, "learning_rate": 3.547486033519553e-06, "loss": 0.4403, "step": 127 }, { "epoch": 0.10735253005311714, "grad_norm": 0.4398345633876836, "learning_rate": 3.575418994413408e-06, "loss": 0.4403, "step": 128 }, { "epoch": 0.10819122169415711, "grad_norm": 0.4141826514411364, "learning_rate": 3.603351955307263e-06, "loss": 0.4339, "step": 129 }, { "epoch": 0.1090299133351971, "grad_norm": 0.46969025488068283, "learning_rate": 3.6312849162011176e-06, "loss": 0.4583, "step": 130 }, { "epoch": 0.10986860497623707, "grad_norm": 0.4427791128511243, "learning_rate": 3.6592178770949723e-06, "loss": 0.4431, "step": 131 }, { "epoch": 0.11070729661727705, "grad_norm": 0.41775808325521974, "learning_rate": 3.687150837988827e-06, "loss": 0.4383, "step": 132 }, { "epoch": 0.11154598825831702, "grad_norm": 0.43827461679152735, "learning_rate": 3.715083798882682e-06, "loss": 0.4522, "step": 133 }, { "epoch": 0.112384679899357, "grad_norm": 0.4152975554771985, "learning_rate": 3.7430167597765364e-06, "loss": 0.4149, "step": 134 }, { "epoch": 0.11322337154039698, "grad_norm": 0.4732954685078655, "learning_rate": 3.7709497206703915e-06, "loss": 0.449, "step": 135 }, { "epoch": 0.11406206318143695, "grad_norm": 0.43813339217443215, "learning_rate": 3.798882681564246e-06, "loss": 0.4318, "step": 136 }, { "epoch": 0.11490075482247694, "grad_norm": 0.4571314496872763, "learning_rate": 3.826815642458101e-06, "loss": 0.4499, "step": 137 }, { "epoch": 0.11573944646351692, "grad_norm": 0.4628970333532903, "learning_rate": 3.854748603351956e-06, "loss": 0.458, "step": 138 }, { "epoch": 0.11657813810455689, "grad_norm": 0.43055843459719123, "learning_rate": 3.88268156424581e-06, "loss": 0.4385, "step": 139 }, { "epoch": 0.11741682974559686, "grad_norm": 0.46586964482265436, "learning_rate": 3.910614525139665e-06, "loss": 0.4492, "step": 140 }, { "epoch": 0.11825552138663685, "grad_norm": 0.5347361885831022, "learning_rate": 3.93854748603352e-06, "loss": 0.4483, "step": 141 }, { "epoch": 0.11909421302767682, "grad_norm": 0.4382043807718099, "learning_rate": 3.9664804469273745e-06, "loss": 0.4408, "step": 142 }, { "epoch": 0.1199329046687168, "grad_norm": 0.4872537056662483, "learning_rate": 3.994413407821229e-06, "loss": 0.4331, "step": 143 }, { "epoch": 0.12077159630975678, "grad_norm": 0.48823342690279375, "learning_rate": 4.022346368715084e-06, "loss": 0.4346, "step": 144 }, { "epoch": 0.12161028795079676, "grad_norm": 0.4607850497986509, "learning_rate": 4.050279329608939e-06, "loss": 0.4268, "step": 145 }, { "epoch": 0.12244897959183673, "grad_norm": 0.46661645848683825, "learning_rate": 4.078212290502794e-06, "loss": 0.4457, "step": 146 }, { "epoch": 0.1232876712328767, "grad_norm": 0.42101781505431674, "learning_rate": 4.106145251396648e-06, "loss": 0.4381, "step": 147 }, { "epoch": 0.12412636287391669, "grad_norm": 0.4810844887212102, "learning_rate": 4.134078212290504e-06, "loss": 0.4618, "step": 148 }, { "epoch": 0.12496505451495667, "grad_norm": 0.45440114756537453, "learning_rate": 4.1620111731843575e-06, "loss": 0.4491, "step": 149 }, { "epoch": 0.12580374615599665, "grad_norm": 0.48801304589966993, "learning_rate": 4.189944134078213e-06, "loss": 0.4398, "step": 150 }, { "epoch": 0.12664243779703663, "grad_norm": 0.45062139672510776, "learning_rate": 4.217877094972068e-06, "loss": 0.4286, "step": 151 }, { "epoch": 0.1274811294380766, "grad_norm": 0.4946240847500636, "learning_rate": 4.2458100558659224e-06, "loss": 0.4406, "step": 152 }, { "epoch": 0.12831982107911657, "grad_norm": 0.5224317650211763, "learning_rate": 4.273743016759777e-06, "loss": 0.4418, "step": 153 }, { "epoch": 0.12915851272015655, "grad_norm": 0.49922599191835976, "learning_rate": 4.301675977653632e-06, "loss": 0.4263, "step": 154 }, { "epoch": 0.12999720436119652, "grad_norm": 0.4363699513671281, "learning_rate": 4.3296089385474866e-06, "loss": 0.4478, "step": 155 }, { "epoch": 0.13083589600223652, "grad_norm": 0.4782663133687644, "learning_rate": 4.357541899441341e-06, "loss": 0.4498, "step": 156 }, { "epoch": 0.1316745876432765, "grad_norm": 0.4853881678141438, "learning_rate": 4.385474860335196e-06, "loss": 0.4448, "step": 157 }, { "epoch": 0.13251327928431647, "grad_norm": 0.4583580055374569, "learning_rate": 4.413407821229051e-06, "loss": 0.4095, "step": 158 }, { "epoch": 0.13335197092535644, "grad_norm": 0.4883224291443686, "learning_rate": 4.441340782122905e-06, "loss": 0.4435, "step": 159 }, { "epoch": 0.13419066256639642, "grad_norm": 0.46490369686888455, "learning_rate": 4.46927374301676e-06, "loss": 0.4449, "step": 160 }, { "epoch": 0.1350293542074364, "grad_norm": 0.4574586975188319, "learning_rate": 4.497206703910615e-06, "loss": 0.4275, "step": 161 }, { "epoch": 0.13586804584847637, "grad_norm": 0.4583044829349935, "learning_rate": 4.5251396648044695e-06, "loss": 0.4133, "step": 162 }, { "epoch": 0.13670673748951637, "grad_norm": 0.45271556860344137, "learning_rate": 4.553072625698324e-06, "loss": 0.4364, "step": 163 }, { "epoch": 0.13754542913055634, "grad_norm": 0.4285806248148666, "learning_rate": 4.581005586592179e-06, "loss": 0.4239, "step": 164 }, { "epoch": 0.13838412077159631, "grad_norm": 0.46924857067705966, "learning_rate": 4.608938547486034e-06, "loss": 0.433, "step": 165 }, { "epoch": 0.1392228124126363, "grad_norm": 0.4659517024567275, "learning_rate": 4.636871508379888e-06, "loss": 0.4402, "step": 166 }, { "epoch": 0.14006150405367626, "grad_norm": 0.50931247237455, "learning_rate": 4.664804469273743e-06, "loss": 0.4354, "step": 167 }, { "epoch": 0.14090019569471623, "grad_norm": 0.46684723906160697, "learning_rate": 4.692737430167599e-06, "loss": 0.4174, "step": 168 }, { "epoch": 0.1417388873357562, "grad_norm": 0.4329055976249434, "learning_rate": 4.7206703910614525e-06, "loss": 0.4394, "step": 169 }, { "epoch": 0.1425775789767962, "grad_norm": 0.4868931290166039, "learning_rate": 4.748603351955308e-06, "loss": 0.4385, "step": 170 }, { "epoch": 0.14341627061783618, "grad_norm": 0.4507382521220108, "learning_rate": 4.776536312849163e-06, "loss": 0.452, "step": 171 }, { "epoch": 0.14425496225887616, "grad_norm": 0.4400970280400323, "learning_rate": 4.8044692737430175e-06, "loss": 0.4248, "step": 172 }, { "epoch": 0.14509365389991613, "grad_norm": 0.47387160204824874, "learning_rate": 4.832402234636872e-06, "loss": 0.4336, "step": 173 }, { "epoch": 0.1459323455409561, "grad_norm": 0.45141229239538827, "learning_rate": 4.860335195530727e-06, "loss": 0.4214, "step": 174 }, { "epoch": 0.14677103718199608, "grad_norm": 0.45348371512671043, "learning_rate": 4.8882681564245816e-06, "loss": 0.4302, "step": 175 }, { "epoch": 0.14760972882303605, "grad_norm": 0.4567886294802777, "learning_rate": 4.916201117318436e-06, "loss": 0.4384, "step": 176 }, { "epoch": 0.14844842046407605, "grad_norm": 0.4860708891507246, "learning_rate": 4.944134078212291e-06, "loss": 0.4233, "step": 177 }, { "epoch": 0.14928711210511603, "grad_norm": 0.472387177445158, "learning_rate": 4.972067039106146e-06, "loss": 0.4278, "step": 178 }, { "epoch": 0.150125803746156, "grad_norm": 0.4884673527350831, "learning_rate": 5e-06, "loss": 0.4356, "step": 179 }, { "epoch": 0.15096449538719597, "grad_norm": 0.4635645085750586, "learning_rate": 5.027932960893855e-06, "loss": 0.4345, "step": 180 }, { "epoch": 0.15180318702823595, "grad_norm": 0.48946671551618065, "learning_rate": 5.055865921787711e-06, "loss": 0.4278, "step": 181 }, { "epoch": 0.15264187866927592, "grad_norm": 0.4667155063536315, "learning_rate": 5.0837988826815645e-06, "loss": 0.3987, "step": 182 }, { "epoch": 0.1534805703103159, "grad_norm": 0.4420399604168888, "learning_rate": 5.111731843575419e-06, "loss": 0.4364, "step": 183 }, { "epoch": 0.1543192619513559, "grad_norm": 0.44588638964531807, "learning_rate": 5.139664804469274e-06, "loss": 0.3837, "step": 184 }, { "epoch": 0.15515795359239587, "grad_norm": 0.5234953300901197, "learning_rate": 5.1675977653631295e-06, "loss": 0.4271, "step": 185 }, { "epoch": 0.15599664523343584, "grad_norm": 0.4674916694004524, "learning_rate": 5.195530726256983e-06, "loss": 0.4045, "step": 186 }, { "epoch": 0.15683533687447582, "grad_norm": 0.4382272787682601, "learning_rate": 5.223463687150838e-06, "loss": 0.4259, "step": 187 }, { "epoch": 0.1576740285155158, "grad_norm": 0.4450750154111856, "learning_rate": 5.251396648044693e-06, "loss": 0.4257, "step": 188 }, { "epoch": 0.15851272015655576, "grad_norm": 0.5183549388720708, "learning_rate": 5.279329608938548e-06, "loss": 0.4295, "step": 189 }, { "epoch": 0.15935141179759574, "grad_norm": 0.5006380700163188, "learning_rate": 5.307262569832403e-06, "loss": 0.4247, "step": 190 }, { "epoch": 0.16019010343863574, "grad_norm": 0.500219699730586, "learning_rate": 5.335195530726257e-06, "loss": 0.4487, "step": 191 }, { "epoch": 0.1610287950796757, "grad_norm": 0.49312486587416143, "learning_rate": 5.3631284916201125e-06, "loss": 0.4401, "step": 192 }, { "epoch": 0.1618674867207157, "grad_norm": 0.5248194250249059, "learning_rate": 5.391061452513967e-06, "loss": 0.4401, "step": 193 }, { "epoch": 0.16270617836175566, "grad_norm": 0.5036229159553866, "learning_rate": 5.418994413407822e-06, "loss": 0.4248, "step": 194 }, { "epoch": 0.16354487000279563, "grad_norm": 0.49622524350212005, "learning_rate": 5.446927374301676e-06, "loss": 0.4184, "step": 195 }, { "epoch": 0.1643835616438356, "grad_norm": 0.49651305469954643, "learning_rate": 5.474860335195531e-06, "loss": 0.3994, "step": 196 }, { "epoch": 0.16522225328487558, "grad_norm": 0.4955036315340832, "learning_rate": 5.502793296089386e-06, "loss": 0.419, "step": 197 }, { "epoch": 0.16606094492591558, "grad_norm": 0.5005524583806275, "learning_rate": 5.530726256983241e-06, "loss": 0.4247, "step": 198 }, { "epoch": 0.16689963656695556, "grad_norm": 0.5076711768299723, "learning_rate": 5.558659217877096e-06, "loss": 0.3977, "step": 199 }, { "epoch": 0.16773832820799553, "grad_norm": 0.5590102668155178, "learning_rate": 5.58659217877095e-06, "loss": 0.423, "step": 200 }, { "epoch": 0.1685770198490355, "grad_norm": 0.4626209966955455, "learning_rate": 5.614525139664805e-06, "loss": 0.443, "step": 201 }, { "epoch": 0.16941571149007548, "grad_norm": 0.47906393513851936, "learning_rate": 5.6424581005586595e-06, "loss": 0.4195, "step": 202 }, { "epoch": 0.17025440313111545, "grad_norm": 0.5572596476027544, "learning_rate": 5.670391061452515e-06, "loss": 0.4037, "step": 203 }, { "epoch": 0.17109309477215542, "grad_norm": 0.5255637500614329, "learning_rate": 5.698324022346369e-06, "loss": 0.422, "step": 204 }, { "epoch": 0.17193178641319543, "grad_norm": 0.5207115838012798, "learning_rate": 5.726256983240224e-06, "loss": 0.4364, "step": 205 }, { "epoch": 0.1727704780542354, "grad_norm": 0.4573922167057304, "learning_rate": 5.754189944134078e-06, "loss": 0.4215, "step": 206 }, { "epoch": 0.17360916969527537, "grad_norm": 0.5046945968086317, "learning_rate": 5.782122905027934e-06, "loss": 0.4071, "step": 207 }, { "epoch": 0.17444786133631535, "grad_norm": 0.5103863648071226, "learning_rate": 5.810055865921789e-06, "loss": 0.4145, "step": 208 }, { "epoch": 0.17528655297735532, "grad_norm": 0.5308414060611591, "learning_rate": 5.8379888268156425e-06, "loss": 0.4385, "step": 209 }, { "epoch": 0.1761252446183953, "grad_norm": 0.6305744187357092, "learning_rate": 5.865921787709497e-06, "loss": 0.4249, "step": 210 }, { "epoch": 0.17696393625943527, "grad_norm": 0.5250301906002529, "learning_rate": 5.893854748603353e-06, "loss": 0.4411, "step": 211 }, { "epoch": 0.17780262790047527, "grad_norm": 0.5243402810011055, "learning_rate": 5.9217877094972075e-06, "loss": 0.4251, "step": 212 }, { "epoch": 0.17864131954151524, "grad_norm": 0.5394555935293267, "learning_rate": 5.949720670391061e-06, "loss": 0.3985, "step": 213 }, { "epoch": 0.17948001118255522, "grad_norm": 0.5189391819995739, "learning_rate": 5.977653631284917e-06, "loss": 0.4235, "step": 214 }, { "epoch": 0.1803187028235952, "grad_norm": 0.5079662921673935, "learning_rate": 6.005586592178772e-06, "loss": 0.4037, "step": 215 }, { "epoch": 0.18115739446463516, "grad_norm": 0.5246688853632202, "learning_rate": 6.033519553072626e-06, "loss": 0.4281, "step": 216 }, { "epoch": 0.18199608610567514, "grad_norm": 0.5022169607134624, "learning_rate": 6.061452513966482e-06, "loss": 0.4193, "step": 217 }, { "epoch": 0.18283477774671514, "grad_norm": 0.48646792383470355, "learning_rate": 6.089385474860336e-06, "loss": 0.4429, "step": 218 }, { "epoch": 0.1836734693877551, "grad_norm": 0.4974386705231177, "learning_rate": 6.1173184357541904e-06, "loss": 0.4147, "step": 219 }, { "epoch": 0.18451216102879509, "grad_norm": 0.4671751353413155, "learning_rate": 6.145251396648045e-06, "loss": 0.3841, "step": 220 }, { "epoch": 0.18535085266983506, "grad_norm": 0.4784257539366408, "learning_rate": 6.173184357541901e-06, "loss": 0.4314, "step": 221 }, { "epoch": 0.18618954431087503, "grad_norm": 0.5586777658889984, "learning_rate": 6.2011173184357546e-06, "loss": 0.4005, "step": 222 }, { "epoch": 0.187028235951915, "grad_norm": 0.4813535024092993, "learning_rate": 6.229050279329609e-06, "loss": 0.4007, "step": 223 }, { "epoch": 0.18786692759295498, "grad_norm": 0.5676989992918867, "learning_rate": 6.256983240223464e-06, "loss": 0.4167, "step": 224 }, { "epoch": 0.18870561923399498, "grad_norm": 0.6044927865601075, "learning_rate": 6.2849162011173195e-06, "loss": 0.4118, "step": 225 }, { "epoch": 0.18954431087503495, "grad_norm": 0.5249001960769373, "learning_rate": 6.312849162011173e-06, "loss": 0.3873, "step": 226 }, { "epoch": 0.19038300251607493, "grad_norm": 0.5769542570133145, "learning_rate": 6.340782122905028e-06, "loss": 0.4162, "step": 227 }, { "epoch": 0.1912216941571149, "grad_norm": 0.528822413295793, "learning_rate": 6.368715083798883e-06, "loss": 0.4145, "step": 228 }, { "epoch": 0.19206038579815488, "grad_norm": 0.5241532265659992, "learning_rate": 6.396648044692738e-06, "loss": 0.4061, "step": 229 }, { "epoch": 0.19289907743919485, "grad_norm": 0.520855725973544, "learning_rate": 6.424581005586593e-06, "loss": 0.398, "step": 230 }, { "epoch": 0.19373776908023482, "grad_norm": 0.5740004049451376, "learning_rate": 6.452513966480447e-06, "loss": 0.4247, "step": 231 }, { "epoch": 0.19457646072127482, "grad_norm": 0.5284335472931332, "learning_rate": 6.480446927374302e-06, "loss": 0.4394, "step": 232 }, { "epoch": 0.1954151523623148, "grad_norm": 0.47923135414940016, "learning_rate": 6.508379888268157e-06, "loss": 0.3979, "step": 233 }, { "epoch": 0.19625384400335477, "grad_norm": 0.6029394189501086, "learning_rate": 6.536312849162012e-06, "loss": 0.4212, "step": 234 }, { "epoch": 0.19709253564439475, "grad_norm": 0.6103043310363127, "learning_rate": 6.564245810055866e-06, "loss": 0.4002, "step": 235 }, { "epoch": 0.19793122728543472, "grad_norm": 0.516608432834648, "learning_rate": 6.592178770949721e-06, "loss": 0.4375, "step": 236 }, { "epoch": 0.1987699189264747, "grad_norm": 0.6061396416902631, "learning_rate": 6.620111731843576e-06, "loss": 0.4242, "step": 237 }, { "epoch": 0.19960861056751467, "grad_norm": 0.5446546985682224, "learning_rate": 6.648044692737431e-06, "loss": 0.4007, "step": 238 }, { "epoch": 0.20044730220855467, "grad_norm": 0.5518945783593493, "learning_rate": 6.675977653631286e-06, "loss": 0.44, "step": 239 }, { "epoch": 0.20128599384959464, "grad_norm": 0.5527609277225047, "learning_rate": 6.70391061452514e-06, "loss": 0.4157, "step": 240 }, { "epoch": 0.20212468549063461, "grad_norm": 0.5085124938612701, "learning_rate": 6.731843575418995e-06, "loss": 0.4239, "step": 241 }, { "epoch": 0.2029633771316746, "grad_norm": 0.5966365622404881, "learning_rate": 6.7597765363128496e-06, "loss": 0.4261, "step": 242 }, { "epoch": 0.20380206877271456, "grad_norm": 0.5030341666996009, "learning_rate": 6.787709497206705e-06, "loss": 0.4042, "step": 243 }, { "epoch": 0.20464076041375454, "grad_norm": 0.5186225406238368, "learning_rate": 6.815642458100559e-06, "loss": 0.4248, "step": 244 }, { "epoch": 0.2054794520547945, "grad_norm": 0.48245578856792315, "learning_rate": 6.843575418994414e-06, "loss": 0.4205, "step": 245 }, { "epoch": 0.2063181436958345, "grad_norm": 0.48131422500990284, "learning_rate": 6.871508379888268e-06, "loss": 0.4114, "step": 246 }, { "epoch": 0.20715683533687448, "grad_norm": 0.6114450795612186, "learning_rate": 6.899441340782124e-06, "loss": 0.4217, "step": 247 }, { "epoch": 0.20799552697791446, "grad_norm": 0.5692656126333197, "learning_rate": 6.927374301675979e-06, "loss": 0.4362, "step": 248 }, { "epoch": 0.20883421861895443, "grad_norm": 0.5212865184606266, "learning_rate": 6.9553072625698325e-06, "loss": 0.3964, "step": 249 }, { "epoch": 0.2096729102599944, "grad_norm": 0.6782151406431933, "learning_rate": 6.983240223463687e-06, "loss": 0.4274, "step": 250 }, { "epoch": 0.21051160190103438, "grad_norm": 0.5410114797880644, "learning_rate": 7.011173184357543e-06, "loss": 0.4165, "step": 251 }, { "epoch": 0.21135029354207435, "grad_norm": 0.5665976922606838, "learning_rate": 7.0391061452513975e-06, "loss": 0.4194, "step": 252 }, { "epoch": 0.21218898518311435, "grad_norm": 0.5854720225767801, "learning_rate": 7.067039106145251e-06, "loss": 0.4033, "step": 253 }, { "epoch": 0.21302767682415433, "grad_norm": 0.6980054452612185, "learning_rate": 7.094972067039106e-06, "loss": 0.4033, "step": 254 }, { "epoch": 0.2138663684651943, "grad_norm": 0.5731600920131744, "learning_rate": 7.122905027932962e-06, "loss": 0.4167, "step": 255 }, { "epoch": 0.21470506010623427, "grad_norm": 0.6141044171946929, "learning_rate": 7.150837988826816e-06, "loss": 0.408, "step": 256 }, { "epoch": 0.21554375174727425, "grad_norm": 0.6569324758379754, "learning_rate": 7.178770949720671e-06, "loss": 0.4006, "step": 257 }, { "epoch": 0.21638244338831422, "grad_norm": 0.5122465649643707, "learning_rate": 7.206703910614526e-06, "loss": 0.416, "step": 258 }, { "epoch": 0.2172211350293542, "grad_norm": 0.547990612998738, "learning_rate": 7.2346368715083805e-06, "loss": 0.391, "step": 259 }, { "epoch": 0.2180598266703942, "grad_norm": 0.6051475334897611, "learning_rate": 7.262569832402235e-06, "loss": 0.429, "step": 260 }, { "epoch": 0.21889851831143417, "grad_norm": 0.5396942772842632, "learning_rate": 7.290502793296091e-06, "loss": 0.4213, "step": 261 }, { "epoch": 0.21973720995247414, "grad_norm": 0.5501787842272241, "learning_rate": 7.318435754189945e-06, "loss": 0.4072, "step": 262 }, { "epoch": 0.22057590159351412, "grad_norm": 0.5591950550901881, "learning_rate": 7.346368715083799e-06, "loss": 0.4149, "step": 263 }, { "epoch": 0.2214145932345541, "grad_norm": 0.5227401867338511, "learning_rate": 7.374301675977654e-06, "loss": 0.4569, "step": 264 }, { "epoch": 0.22225328487559406, "grad_norm": 0.5083984324008253, "learning_rate": 7.4022346368715096e-06, "loss": 0.42, "step": 265 }, { "epoch": 0.22309197651663404, "grad_norm": 0.665697583038521, "learning_rate": 7.430167597765364e-06, "loss": 0.4339, "step": 266 }, { "epoch": 0.22393066815767404, "grad_norm": 0.4924808760086031, "learning_rate": 7.458100558659218e-06, "loss": 0.4072, "step": 267 }, { "epoch": 0.224769359798714, "grad_norm": 0.5643787458367571, "learning_rate": 7.486033519553073e-06, "loss": 0.4422, "step": 268 }, { "epoch": 0.225608051439754, "grad_norm": 0.5085318074038185, "learning_rate": 7.513966480446928e-06, "loss": 0.4019, "step": 269 }, { "epoch": 0.22644674308079396, "grad_norm": 0.5289337243758662, "learning_rate": 7.541899441340783e-06, "loss": 0.4142, "step": 270 }, { "epoch": 0.22728543472183393, "grad_norm": 0.5631501161477128, "learning_rate": 7.569832402234637e-06, "loss": 0.421, "step": 271 }, { "epoch": 0.2281241263628739, "grad_norm": 0.5693111673319953, "learning_rate": 7.597765363128492e-06, "loss": 0.4196, "step": 272 }, { "epoch": 0.22896281800391388, "grad_norm": 0.4936910340805505, "learning_rate": 7.625698324022347e-06, "loss": 0.4055, "step": 273 }, { "epoch": 0.22980150964495388, "grad_norm": 0.5116657991905827, "learning_rate": 7.653631284916202e-06, "loss": 0.4292, "step": 274 }, { "epoch": 0.23064020128599386, "grad_norm": 0.5111828117083942, "learning_rate": 7.681564245810057e-06, "loss": 0.4333, "step": 275 }, { "epoch": 0.23147889292703383, "grad_norm": 0.5349418735427913, "learning_rate": 7.709497206703911e-06, "loss": 0.3928, "step": 276 }, { "epoch": 0.2323175845680738, "grad_norm": 0.5140354674880439, "learning_rate": 7.737430167597766e-06, "loss": 0.3991, "step": 277 }, { "epoch": 0.23315627620911378, "grad_norm": 0.437869268104339, "learning_rate": 7.76536312849162e-06, "loss": 0.3931, "step": 278 }, { "epoch": 0.23399496785015375, "grad_norm": 0.5660211515457506, "learning_rate": 7.793296089385475e-06, "loss": 0.4285, "step": 279 }, { "epoch": 0.23483365949119372, "grad_norm": 0.4992136108928247, "learning_rate": 7.82122905027933e-06, "loss": 0.402, "step": 280 }, { "epoch": 0.23567235113223373, "grad_norm": 0.4439602844783769, "learning_rate": 7.849162011173185e-06, "loss": 0.4173, "step": 281 }, { "epoch": 0.2365110427732737, "grad_norm": 0.5798732918978154, "learning_rate": 7.87709497206704e-06, "loss": 0.4142, "step": 282 }, { "epoch": 0.23734973441431367, "grad_norm": 0.4609805261415577, "learning_rate": 7.905027932960894e-06, "loss": 0.4095, "step": 283 }, { "epoch": 0.23818842605535365, "grad_norm": 0.5327780257891496, "learning_rate": 7.932960893854749e-06, "loss": 0.4452, "step": 284 }, { "epoch": 0.23902711769639362, "grad_norm": 0.4565865703766281, "learning_rate": 7.960893854748604e-06, "loss": 0.3998, "step": 285 }, { "epoch": 0.2398658093374336, "grad_norm": 0.4541743771295166, "learning_rate": 7.988826815642458e-06, "loss": 0.4057, "step": 286 }, { "epoch": 0.24070450097847357, "grad_norm": 0.44250699642745084, "learning_rate": 8.016759776536313e-06, "loss": 0.3917, "step": 287 }, { "epoch": 0.24154319261951357, "grad_norm": 0.48168108982752955, "learning_rate": 8.044692737430168e-06, "loss": 0.4002, "step": 288 }, { "epoch": 0.24238188426055354, "grad_norm": 0.4964271315701034, "learning_rate": 8.072625698324023e-06, "loss": 0.4111, "step": 289 }, { "epoch": 0.24322057590159352, "grad_norm": 0.5059427407115753, "learning_rate": 8.100558659217877e-06, "loss": 0.4137, "step": 290 }, { "epoch": 0.2440592675426335, "grad_norm": 0.4919785553222444, "learning_rate": 8.128491620111732e-06, "loss": 0.4066, "step": 291 }, { "epoch": 0.24489795918367346, "grad_norm": 0.4937321742253734, "learning_rate": 8.156424581005588e-06, "loss": 0.3819, "step": 292 }, { "epoch": 0.24573665082471344, "grad_norm": 0.49090805226690526, "learning_rate": 8.184357541899443e-06, "loss": 0.3791, "step": 293 }, { "epoch": 0.2465753424657534, "grad_norm": 0.5500679077496888, "learning_rate": 8.212290502793296e-06, "loss": 0.3827, "step": 294 }, { "epoch": 0.2474140341067934, "grad_norm": 0.5332613232207686, "learning_rate": 8.24022346368715e-06, "loss": 0.3989, "step": 295 }, { "epoch": 0.24825272574783339, "grad_norm": 0.5335548148179486, "learning_rate": 8.268156424581007e-06, "loss": 0.4074, "step": 296 }, { "epoch": 0.24909141738887336, "grad_norm": 0.49534506595423106, "learning_rate": 8.296089385474862e-06, "loss": 0.4229, "step": 297 }, { "epoch": 0.24993010902991333, "grad_norm": 0.5580965227749464, "learning_rate": 8.324022346368715e-06, "loss": 0.4022, "step": 298 }, { "epoch": 0.2507688006709533, "grad_norm": 0.45926862806352226, "learning_rate": 8.35195530726257e-06, "loss": 0.3916, "step": 299 }, { "epoch": 0.2516074923119933, "grad_norm": 0.6002444012860928, "learning_rate": 8.379888268156426e-06, "loss": 0.3862, "step": 300 }, { "epoch": 0.25244618395303325, "grad_norm": 0.4814276058088228, "learning_rate": 8.40782122905028e-06, "loss": 0.4163, "step": 301 }, { "epoch": 0.25328487559407326, "grad_norm": 0.538719518186603, "learning_rate": 8.435754189944135e-06, "loss": 0.3837, "step": 302 }, { "epoch": 0.2541235672351132, "grad_norm": 0.5006828274133552, "learning_rate": 8.463687150837988e-06, "loss": 0.4051, "step": 303 }, { "epoch": 0.2549622588761532, "grad_norm": 0.46671235083965884, "learning_rate": 8.491620111731845e-06, "loss": 0.4298, "step": 304 }, { "epoch": 0.2558009505171932, "grad_norm": 0.5429655730087893, "learning_rate": 8.5195530726257e-06, "loss": 0.4029, "step": 305 }, { "epoch": 0.25663964215823315, "grad_norm": 0.5163814656730529, "learning_rate": 8.547486033519554e-06, "loss": 0.4097, "step": 306 }, { "epoch": 0.25747833379927315, "grad_norm": 0.44399924160273785, "learning_rate": 8.575418994413407e-06, "loss": 0.4016, "step": 307 }, { "epoch": 0.2583170254403131, "grad_norm": 0.4935395816689908, "learning_rate": 8.603351955307264e-06, "loss": 0.4041, "step": 308 }, { "epoch": 0.2591557170813531, "grad_norm": 0.5330123845092941, "learning_rate": 8.631284916201118e-06, "loss": 0.4135, "step": 309 }, { "epoch": 0.25999440872239304, "grad_norm": 0.5414455845996099, "learning_rate": 8.659217877094973e-06, "loss": 0.4073, "step": 310 }, { "epoch": 0.26083310036343305, "grad_norm": 0.5194726789324334, "learning_rate": 8.687150837988828e-06, "loss": 0.4114, "step": 311 }, { "epoch": 0.26167179200447305, "grad_norm": 0.633584976215161, "learning_rate": 8.715083798882683e-06, "loss": 0.414, "step": 312 }, { "epoch": 0.262510483645513, "grad_norm": 0.49730068009525724, "learning_rate": 8.743016759776537e-06, "loss": 0.453, "step": 313 }, { "epoch": 0.263349175286553, "grad_norm": 0.6188803380481881, "learning_rate": 8.770949720670392e-06, "loss": 0.4147, "step": 314 }, { "epoch": 0.26418786692759294, "grad_norm": 0.5550634800981278, "learning_rate": 8.798882681564247e-06, "loss": 0.4192, "step": 315 }, { "epoch": 0.26502655856863294, "grad_norm": 0.5159665724517197, "learning_rate": 8.826815642458101e-06, "loss": 0.3888, "step": 316 }, { "epoch": 0.2658652502096729, "grad_norm": 0.5007753381418846, "learning_rate": 8.854748603351956e-06, "loss": 0.4348, "step": 317 }, { "epoch": 0.2667039418507129, "grad_norm": 0.6213087610910445, "learning_rate": 8.88268156424581e-06, "loss": 0.4175, "step": 318 }, { "epoch": 0.2675426334917529, "grad_norm": 0.5325416214402273, "learning_rate": 8.910614525139666e-06, "loss": 0.4049, "step": 319 }, { "epoch": 0.26838132513279284, "grad_norm": 0.5723224869176855, "learning_rate": 8.93854748603352e-06, "loss": 0.4151, "step": 320 }, { "epoch": 0.26922001677383284, "grad_norm": 0.5188838768959108, "learning_rate": 8.966480446927375e-06, "loss": 0.3885, "step": 321 }, { "epoch": 0.2700587084148728, "grad_norm": 0.5829351233521113, "learning_rate": 8.99441340782123e-06, "loss": 0.4146, "step": 322 }, { "epoch": 0.2708974000559128, "grad_norm": 0.5402637882497822, "learning_rate": 9.022346368715084e-06, "loss": 0.406, "step": 323 }, { "epoch": 0.27173609169695273, "grad_norm": 0.5307727602477532, "learning_rate": 9.050279329608939e-06, "loss": 0.4192, "step": 324 }, { "epoch": 0.27257478333799273, "grad_norm": 0.5117066737727363, "learning_rate": 9.078212290502794e-06, "loss": 0.3938, "step": 325 }, { "epoch": 0.27341347497903273, "grad_norm": 0.5047918642811664, "learning_rate": 9.106145251396648e-06, "loss": 0.4263, "step": 326 }, { "epoch": 0.2742521666200727, "grad_norm": 0.4713672095178813, "learning_rate": 9.134078212290503e-06, "loss": 0.393, "step": 327 }, { "epoch": 0.2750908582611127, "grad_norm": 0.5335867584732679, "learning_rate": 9.162011173184358e-06, "loss": 0.4227, "step": 328 }, { "epoch": 0.2759295499021526, "grad_norm": 0.48928132924311896, "learning_rate": 9.189944134078213e-06, "loss": 0.4221, "step": 329 }, { "epoch": 0.27676824154319263, "grad_norm": 0.5175693099209172, "learning_rate": 9.217877094972067e-06, "loss": 0.4174, "step": 330 }, { "epoch": 0.2776069331842326, "grad_norm": 0.5247425910568847, "learning_rate": 9.245810055865922e-06, "loss": 0.3849, "step": 331 }, { "epoch": 0.2784456248252726, "grad_norm": 0.527228872037385, "learning_rate": 9.273743016759777e-06, "loss": 0.4119, "step": 332 }, { "epoch": 0.2792843164663126, "grad_norm": 0.4893528446136709, "learning_rate": 9.301675977653633e-06, "loss": 0.4202, "step": 333 }, { "epoch": 0.2801230081073525, "grad_norm": 0.5066360206086963, "learning_rate": 9.329608938547486e-06, "loss": 0.4151, "step": 334 }, { "epoch": 0.2809616997483925, "grad_norm": 0.5245313450525231, "learning_rate": 9.35754189944134e-06, "loss": 0.4215, "step": 335 }, { "epoch": 0.28180039138943247, "grad_norm": 0.49868626325793947, "learning_rate": 9.385474860335197e-06, "loss": 0.4042, "step": 336 }, { "epoch": 0.28263908303047247, "grad_norm": 0.5099909643052681, "learning_rate": 9.413407821229052e-06, "loss": 0.3874, "step": 337 }, { "epoch": 0.2834777746715124, "grad_norm": 0.4835796020056084, "learning_rate": 9.441340782122905e-06, "loss": 0.42, "step": 338 }, { "epoch": 0.2843164663125524, "grad_norm": 0.49774007953567684, "learning_rate": 9.46927374301676e-06, "loss": 0.4174, "step": 339 }, { "epoch": 0.2851551579535924, "grad_norm": 0.5640349857821649, "learning_rate": 9.497206703910616e-06, "loss": 0.3924, "step": 340 }, { "epoch": 0.28599384959463237, "grad_norm": 0.5004932831788864, "learning_rate": 9.52513966480447e-06, "loss": 0.4038, "step": 341 }, { "epoch": 0.28683254123567237, "grad_norm": 0.46805345203575655, "learning_rate": 9.553072625698325e-06, "loss": 0.3978, "step": 342 }, { "epoch": 0.2876712328767123, "grad_norm": 0.4694056165007625, "learning_rate": 9.581005586592178e-06, "loss": 0.3918, "step": 343 }, { "epoch": 0.2885099245177523, "grad_norm": 0.5313312335675071, "learning_rate": 9.608938547486035e-06, "loss": 0.3674, "step": 344 }, { "epoch": 0.28934861615879226, "grad_norm": 0.4825208275877568, "learning_rate": 9.63687150837989e-06, "loss": 0.3988, "step": 345 }, { "epoch": 0.29018730779983226, "grad_norm": 0.5801468899331383, "learning_rate": 9.664804469273744e-06, "loss": 0.4008, "step": 346 }, { "epoch": 0.29102599944087226, "grad_norm": 0.5168445234502261, "learning_rate": 9.692737430167597e-06, "loss": 0.4061, "step": 347 }, { "epoch": 0.2918646910819122, "grad_norm": 0.5354038000139189, "learning_rate": 9.720670391061454e-06, "loss": 0.4115, "step": 348 }, { "epoch": 0.2927033827229522, "grad_norm": 0.6071419407446648, "learning_rate": 9.748603351955308e-06, "loss": 0.4188, "step": 349 }, { "epoch": 0.29354207436399216, "grad_norm": 0.4963632538639926, "learning_rate": 9.776536312849163e-06, "loss": 0.3954, "step": 350 }, { "epoch": 0.29438076600503216, "grad_norm": 0.5660210143469818, "learning_rate": 9.804469273743018e-06, "loss": 0.387, "step": 351 }, { "epoch": 0.2952194576460721, "grad_norm": 0.54580880475279, "learning_rate": 9.832402234636873e-06, "loss": 0.3936, "step": 352 }, { "epoch": 0.2960581492871121, "grad_norm": 0.4514155063037075, "learning_rate": 9.860335195530727e-06, "loss": 0.3855, "step": 353 }, { "epoch": 0.2968968409281521, "grad_norm": 0.5450827546920516, "learning_rate": 9.888268156424582e-06, "loss": 0.3819, "step": 354 }, { "epoch": 0.29773553256919205, "grad_norm": 0.5519005750626623, "learning_rate": 9.916201117318437e-06, "loss": 0.3933, "step": 355 }, { "epoch": 0.29857422421023205, "grad_norm": 0.5295967119088589, "learning_rate": 9.944134078212291e-06, "loss": 0.4003, "step": 356 }, { "epoch": 0.299412915851272, "grad_norm": 0.48791498045852016, "learning_rate": 9.972067039106146e-06, "loss": 0.3687, "step": 357 }, { "epoch": 0.300251607492312, "grad_norm": 0.533844239504554, "learning_rate": 1e-05, "loss": 0.39, "step": 358 }, { "epoch": 0.30109029913335195, "grad_norm": 0.4961973173195682, "learning_rate": 9.999997617309464e-06, "loss": 0.4116, "step": 359 }, { "epoch": 0.30192899077439195, "grad_norm": 0.5544946789435699, "learning_rate": 9.999990469240122e-06, "loss": 0.423, "step": 360 }, { "epoch": 0.30276768241543195, "grad_norm": 0.494607134233082, "learning_rate": 9.99997855579879e-06, "loss": 0.4056, "step": 361 }, { "epoch": 0.3036063740564719, "grad_norm": 0.5267440977813929, "learning_rate": 9.999961876996822e-06, "loss": 0.3987, "step": 362 }, { "epoch": 0.3044450656975119, "grad_norm": 0.478193687306532, "learning_rate": 9.999940432850114e-06, "loss": 0.3719, "step": 363 }, { "epoch": 0.30528375733855184, "grad_norm": 0.5414997229842865, "learning_rate": 9.999914223379103e-06, "loss": 0.4051, "step": 364 }, { "epoch": 0.30612244897959184, "grad_norm": 0.5240957634017788, "learning_rate": 9.999883248608768e-06, "loss": 0.4087, "step": 365 }, { "epoch": 0.3069611406206318, "grad_norm": 0.5055525416769987, "learning_rate": 9.999847508568633e-06, "loss": 0.3858, "step": 366 }, { "epoch": 0.3077998322616718, "grad_norm": 0.5552697886898543, "learning_rate": 9.999807003292759e-06, "loss": 0.3892, "step": 367 }, { "epoch": 0.3086385239027118, "grad_norm": 0.49694181013787986, "learning_rate": 9.99976173281975e-06, "loss": 0.404, "step": 368 }, { "epoch": 0.30947721554375174, "grad_norm": 0.5300861274990346, "learning_rate": 9.999711697192755e-06, "loss": 0.396, "step": 369 }, { "epoch": 0.31031590718479174, "grad_norm": 0.43980834072914304, "learning_rate": 9.999656896459459e-06, "loss": 0.3908, "step": 370 }, { "epoch": 0.3111545988258317, "grad_norm": 0.49941264411519126, "learning_rate": 9.999597330672093e-06, "loss": 0.3914, "step": 371 }, { "epoch": 0.3119932904668717, "grad_norm": 0.4703474497539425, "learning_rate": 9.999532999887428e-06, "loss": 0.413, "step": 372 }, { "epoch": 0.31283198210791163, "grad_norm": 0.5834047844457375, "learning_rate": 9.999463904166773e-06, "loss": 0.3944, "step": 373 }, { "epoch": 0.31367067374895163, "grad_norm": 0.47724634299279034, "learning_rate": 9.999390043575985e-06, "loss": 0.3902, "step": 374 }, { "epoch": 0.31450936538999164, "grad_norm": 0.4792335408670557, "learning_rate": 9.999311418185456e-06, "loss": 0.4104, "step": 375 }, { "epoch": 0.3153480570310316, "grad_norm": 0.542418594923344, "learning_rate": 9.999228028070125e-06, "loss": 0.4048, "step": 376 }, { "epoch": 0.3161867486720716, "grad_norm": 0.557185407702837, "learning_rate": 9.999139873309467e-06, "loss": 0.4031, "step": 377 }, { "epoch": 0.31702544031311153, "grad_norm": 0.5340415424985041, "learning_rate": 9.9990469539875e-06, "loss": 0.3729, "step": 378 }, { "epoch": 0.31786413195415153, "grad_norm": 0.4711849917429446, "learning_rate": 9.998949270192786e-06, "loss": 0.3947, "step": 379 }, { "epoch": 0.3187028235951915, "grad_norm": 0.5808737067821543, "learning_rate": 9.998846822018422e-06, "loss": 0.3899, "step": 380 }, { "epoch": 0.3195415152362315, "grad_norm": 0.4815624627401554, "learning_rate": 9.998739609562051e-06, "loss": 0.3931, "step": 381 }, { "epoch": 0.3203802068772715, "grad_norm": 0.5215809928856362, "learning_rate": 9.998627632925852e-06, "loss": 0.3864, "step": 382 }, { "epoch": 0.3212188985183114, "grad_norm": 0.4975366328139933, "learning_rate": 9.99851089221655e-06, "loss": 0.3699, "step": 383 }, { "epoch": 0.3220575901593514, "grad_norm": 0.48550464588773, "learning_rate": 9.998389387545406e-06, "loss": 0.3957, "step": 384 }, { "epoch": 0.32289628180039137, "grad_norm": 0.5110591100758625, "learning_rate": 9.998263119028225e-06, "loss": 0.3966, "step": 385 }, { "epoch": 0.3237349734414314, "grad_norm": 0.45874090050999744, "learning_rate": 9.998132086785348e-06, "loss": 0.4047, "step": 386 }, { "epoch": 0.3245736650824713, "grad_norm": 0.5140561553045164, "learning_rate": 9.99799629094166e-06, "loss": 0.4374, "step": 387 }, { "epoch": 0.3254123567235113, "grad_norm": 0.4546077664165264, "learning_rate": 9.997855731626587e-06, "loss": 0.4019, "step": 388 }, { "epoch": 0.3262510483645513, "grad_norm": 0.4802811381798769, "learning_rate": 9.997710408974088e-06, "loss": 0.385, "step": 389 }, { "epoch": 0.32708974000559127, "grad_norm": 0.46462281752967816, "learning_rate": 9.997560323122672e-06, "loss": 0.3966, "step": 390 }, { "epoch": 0.32792843164663127, "grad_norm": 0.49253783849033106, "learning_rate": 9.997405474215379e-06, "loss": 0.4045, "step": 391 }, { "epoch": 0.3287671232876712, "grad_norm": 0.45284574583131004, "learning_rate": 9.997245862399791e-06, "loss": 0.3762, "step": 392 }, { "epoch": 0.3296058149287112, "grad_norm": 0.4934653829365337, "learning_rate": 9.997081487828032e-06, "loss": 0.4026, "step": 393 }, { "epoch": 0.33044450656975116, "grad_norm": 0.46710311596503845, "learning_rate": 9.996912350656761e-06, "loss": 0.4118, "step": 394 }, { "epoch": 0.33128319821079116, "grad_norm": 0.4975046131170336, "learning_rate": 9.996738451047185e-06, "loss": 0.4046, "step": 395 }, { "epoch": 0.33212188985183116, "grad_norm": 0.46114578889645647, "learning_rate": 9.996559789165037e-06, "loss": 0.3807, "step": 396 }, { "epoch": 0.3329605814928711, "grad_norm": 0.4934746803461326, "learning_rate": 9.996376365180597e-06, "loss": 0.3819, "step": 397 }, { "epoch": 0.3337992731339111, "grad_norm": 0.5457613387636309, "learning_rate": 9.996188179268684e-06, "loss": 0.404, "step": 398 }, { "epoch": 0.33463796477495106, "grad_norm": 0.4195870201634469, "learning_rate": 9.99599523160865e-06, "loss": 0.3733, "step": 399 }, { "epoch": 0.33547665641599106, "grad_norm": 0.4678729634465343, "learning_rate": 9.995797522384394e-06, "loss": 0.3988, "step": 400 }, { "epoch": 0.336315348057031, "grad_norm": 0.4715372101815983, "learning_rate": 9.995595051784344e-06, "loss": 0.3772, "step": 401 }, { "epoch": 0.337154039698071, "grad_norm": 0.4406744281348178, "learning_rate": 9.99538782000147e-06, "loss": 0.3776, "step": 402 }, { "epoch": 0.337992731339111, "grad_norm": 0.5232565270726035, "learning_rate": 9.995175827233281e-06, "loss": 0.3919, "step": 403 }, { "epoch": 0.33883142298015095, "grad_norm": 0.4823989476162481, "learning_rate": 9.994959073681822e-06, "loss": 0.4172, "step": 404 }, { "epoch": 0.33967011462119095, "grad_norm": 0.4605701239841007, "learning_rate": 9.994737559553676e-06, "loss": 0.3942, "step": 405 }, { "epoch": 0.3405088062622309, "grad_norm": 0.47860028405543126, "learning_rate": 9.99451128505996e-06, "loss": 0.3868, "step": 406 }, { "epoch": 0.3413474979032709, "grad_norm": 0.5287301442810165, "learning_rate": 9.994280250416335e-06, "loss": 0.4113, "step": 407 }, { "epoch": 0.34218618954431085, "grad_norm": 0.5173633690061613, "learning_rate": 9.994044455842993e-06, "loss": 0.4113, "step": 408 }, { "epoch": 0.34302488118535085, "grad_norm": 0.5296335495215186, "learning_rate": 9.993803901564663e-06, "loss": 0.3875, "step": 409 }, { "epoch": 0.34386357282639085, "grad_norm": 0.4709604829214305, "learning_rate": 9.993558587810612e-06, "loss": 0.3864, "step": 410 }, { "epoch": 0.3447022644674308, "grad_norm": 0.5027625460034609, "learning_rate": 9.993308514814647e-06, "loss": 0.3808, "step": 411 }, { "epoch": 0.3455409561084708, "grad_norm": 0.532041190631831, "learning_rate": 9.9930536828151e-06, "loss": 0.4135, "step": 412 }, { "epoch": 0.34637964774951074, "grad_norm": 0.4822983598901762, "learning_rate": 9.992794092054847e-06, "loss": 0.3893, "step": 413 }, { "epoch": 0.34721833939055075, "grad_norm": 0.5454511548425307, "learning_rate": 9.9925297427813e-06, "loss": 0.398, "step": 414 }, { "epoch": 0.3480570310315907, "grad_norm": 0.5433226728950664, "learning_rate": 9.992260635246404e-06, "loss": 0.4271, "step": 415 }, { "epoch": 0.3488957226726307, "grad_norm": 0.48225572804030925, "learning_rate": 9.991986769706637e-06, "loss": 0.3805, "step": 416 }, { "epoch": 0.3497344143136707, "grad_norm": 0.564333339405087, "learning_rate": 9.991708146423014e-06, "loss": 0.3958, "step": 417 }, { "epoch": 0.35057310595471064, "grad_norm": 0.5200783975824043, "learning_rate": 9.991424765661087e-06, "loss": 0.367, "step": 418 }, { "epoch": 0.35141179759575064, "grad_norm": 0.4368739243480999, "learning_rate": 9.991136627690937e-06, "loss": 0.3964, "step": 419 }, { "epoch": 0.3522504892367906, "grad_norm": 0.5547488288767451, "learning_rate": 9.990843732787182e-06, "loss": 0.4141, "step": 420 }, { "epoch": 0.3530891808778306, "grad_norm": 0.469974670604427, "learning_rate": 9.990546081228972e-06, "loss": 0.4059, "step": 421 }, { "epoch": 0.35392787251887053, "grad_norm": 0.4919962613646487, "learning_rate": 9.99024367329999e-06, "loss": 0.3955, "step": 422 }, { "epoch": 0.35476656415991054, "grad_norm": 0.5692860704934386, "learning_rate": 9.98993650928846e-06, "loss": 0.4001, "step": 423 }, { "epoch": 0.35560525580095054, "grad_norm": 0.5156136395501019, "learning_rate": 9.989624589487129e-06, "loss": 0.4028, "step": 424 }, { "epoch": 0.3564439474419905, "grad_norm": 0.534018966587883, "learning_rate": 9.98930791419328e-06, "loss": 0.3715, "step": 425 }, { "epoch": 0.3572826390830305, "grad_norm": 0.5622483684595708, "learning_rate": 9.98898648370873e-06, "loss": 0.3916, "step": 426 }, { "epoch": 0.35812133072407043, "grad_norm": 0.4483591964259997, "learning_rate": 9.988660298339822e-06, "loss": 0.3729, "step": 427 }, { "epoch": 0.35896002236511043, "grad_norm": 0.5329538675264175, "learning_rate": 9.988329358397445e-06, "loss": 0.3954, "step": 428 }, { "epoch": 0.35979871400615043, "grad_norm": 0.5405353945433858, "learning_rate": 9.987993664197e-06, "loss": 0.3875, "step": 429 }, { "epoch": 0.3606374056471904, "grad_norm": 0.5325107507737306, "learning_rate": 9.987653216058436e-06, "loss": 0.3821, "step": 430 }, { "epoch": 0.3614760972882304, "grad_norm": 0.6049361397075185, "learning_rate": 9.987308014306222e-06, "loss": 0.4011, "step": 431 }, { "epoch": 0.3623147889292703, "grad_norm": 0.4669626165712, "learning_rate": 9.986958059269365e-06, "loss": 0.3751, "step": 432 }, { "epoch": 0.3631534805703103, "grad_norm": 0.5311408147978195, "learning_rate": 9.986603351281397e-06, "loss": 0.3973, "step": 433 }, { "epoch": 0.3639921722113503, "grad_norm": 0.49862643029331644, "learning_rate": 9.986243890680381e-06, "loss": 0.3835, "step": 434 }, { "epoch": 0.3648308638523903, "grad_norm": 0.49907180966513376, "learning_rate": 9.98587967780891e-06, "loss": 0.3752, "step": 435 }, { "epoch": 0.3656695554934303, "grad_norm": 0.5778473850299353, "learning_rate": 9.985510713014109e-06, "loss": 0.4083, "step": 436 }, { "epoch": 0.3665082471344702, "grad_norm": 0.48383453377836405, "learning_rate": 9.985136996647628e-06, "loss": 0.3998, "step": 437 }, { "epoch": 0.3673469387755102, "grad_norm": 0.5137949918056467, "learning_rate": 9.984758529065647e-06, "loss": 0.3877, "step": 438 }, { "epoch": 0.36818563041655017, "grad_norm": 0.4386651397429113, "learning_rate": 9.984375310628876e-06, "loss": 0.4086, "step": 439 }, { "epoch": 0.36902432205759017, "grad_norm": 0.4384921727579252, "learning_rate": 9.98398734170255e-06, "loss": 0.3932, "step": 440 }, { "epoch": 0.3698630136986301, "grad_norm": 0.47879696617015516, "learning_rate": 9.983594622656434e-06, "loss": 0.4318, "step": 441 }, { "epoch": 0.3707017053396701, "grad_norm": 0.43913988872069004, "learning_rate": 9.983197153864817e-06, "loss": 0.4063, "step": 442 }, { "epoch": 0.3715403969807101, "grad_norm": 0.4789504728494097, "learning_rate": 9.98279493570652e-06, "loss": 0.3864, "step": 443 }, { "epoch": 0.37237908862175007, "grad_norm": 0.420345526324353, "learning_rate": 9.982387968564884e-06, "loss": 0.3996, "step": 444 }, { "epoch": 0.37321778026279007, "grad_norm": 0.4884550308222254, "learning_rate": 9.981976252827783e-06, "loss": 0.3875, "step": 445 }, { "epoch": 0.37405647190383, "grad_norm": 0.4967179208584988, "learning_rate": 9.981559788887612e-06, "loss": 0.3635, "step": 446 }, { "epoch": 0.37489516354487, "grad_norm": 0.438597965964732, "learning_rate": 9.981138577141293e-06, "loss": 0.3859, "step": 447 }, { "epoch": 0.37573385518590996, "grad_norm": 0.48374101943376907, "learning_rate": 9.980712617990273e-06, "loss": 0.3953, "step": 448 }, { "epoch": 0.37657254682694996, "grad_norm": 0.48885381401134786, "learning_rate": 9.980281911840525e-06, "loss": 0.3825, "step": 449 }, { "epoch": 0.37741123846798996, "grad_norm": 0.5009012403437577, "learning_rate": 9.979846459102542e-06, "loss": 0.4072, "step": 450 }, { "epoch": 0.3782499301090299, "grad_norm": 0.5248334814497818, "learning_rate": 9.979406260191345e-06, "loss": 0.4082, "step": 451 }, { "epoch": 0.3790886217500699, "grad_norm": 0.5124385510754075, "learning_rate": 9.978961315526477e-06, "loss": 0.392, "step": 452 }, { "epoch": 0.37992731339110986, "grad_norm": 0.4826912442799867, "learning_rate": 9.978511625532003e-06, "loss": 0.3851, "step": 453 }, { "epoch": 0.38076600503214986, "grad_norm": 0.4987193016061711, "learning_rate": 9.978057190636516e-06, "loss": 0.3787, "step": 454 }, { "epoch": 0.3816046966731898, "grad_norm": 0.5105411073274652, "learning_rate": 9.977598011273121e-06, "loss": 0.3889, "step": 455 }, { "epoch": 0.3824433883142298, "grad_norm": 0.5037425015841077, "learning_rate": 9.977134087879458e-06, "loss": 0.4064, "step": 456 }, { "epoch": 0.3832820799552698, "grad_norm": 0.5237327489906649, "learning_rate": 9.976665420897676e-06, "loss": 0.395, "step": 457 }, { "epoch": 0.38412077159630975, "grad_norm": 0.4162710896243833, "learning_rate": 9.97619201077445e-06, "loss": 0.393, "step": 458 }, { "epoch": 0.38495946323734975, "grad_norm": 0.4646725052771407, "learning_rate": 9.975713857960979e-06, "loss": 0.3979, "step": 459 }, { "epoch": 0.3857981548783897, "grad_norm": 0.44831556002517686, "learning_rate": 9.975230962912978e-06, "loss": 0.3986, "step": 460 }, { "epoch": 0.3866368465194297, "grad_norm": 0.42989039660243056, "learning_rate": 9.974743326090682e-06, "loss": 0.3885, "step": 461 }, { "epoch": 0.38747553816046965, "grad_norm": 0.4475404105534784, "learning_rate": 9.974250947958848e-06, "loss": 0.3863, "step": 462 }, { "epoch": 0.38831422980150965, "grad_norm": 0.4369460229940806, "learning_rate": 9.973753828986747e-06, "loss": 0.3879, "step": 463 }, { "epoch": 0.38915292144254965, "grad_norm": 0.48576834397832, "learning_rate": 9.973251969648174e-06, "loss": 0.3746, "step": 464 }, { "epoch": 0.3899916130835896, "grad_norm": 0.4632612138392902, "learning_rate": 9.972745370421436e-06, "loss": 0.3981, "step": 465 }, { "epoch": 0.3908303047246296, "grad_norm": 0.477264825075914, "learning_rate": 9.972234031789366e-06, "loss": 0.4048, "step": 466 }, { "epoch": 0.39166899636566954, "grad_norm": 0.5183492920600838, "learning_rate": 9.971717954239303e-06, "loss": 0.3967, "step": 467 }, { "epoch": 0.39250768800670954, "grad_norm": 0.46230621073196765, "learning_rate": 9.971197138263111e-06, "loss": 0.3751, "step": 468 }, { "epoch": 0.3933463796477495, "grad_norm": 0.5186953357901178, "learning_rate": 9.970671584357168e-06, "loss": 0.4091, "step": 469 }, { "epoch": 0.3941850712887895, "grad_norm": 0.4910739859676086, "learning_rate": 9.970141293022364e-06, "loss": 0.3819, "step": 470 }, { "epoch": 0.3950237629298295, "grad_norm": 0.5149783756263244, "learning_rate": 9.96960626476411e-06, "loss": 0.414, "step": 471 }, { "epoch": 0.39586245457086944, "grad_norm": 0.476920382678208, "learning_rate": 9.969066500092327e-06, "loss": 0.406, "step": 472 }, { "epoch": 0.39670114621190944, "grad_norm": 0.4568121248388817, "learning_rate": 9.968521999521455e-06, "loss": 0.4072, "step": 473 }, { "epoch": 0.3975398378529494, "grad_norm": 0.5409787745872683, "learning_rate": 9.967972763570439e-06, "loss": 0.4121, "step": 474 }, { "epoch": 0.3983785294939894, "grad_norm": 0.4802096506433431, "learning_rate": 9.967418792762746e-06, "loss": 0.3678, "step": 475 }, { "epoch": 0.39921722113502933, "grad_norm": 0.5255055823637681, "learning_rate": 9.966860087626355e-06, "loss": 0.3825, "step": 476 }, { "epoch": 0.40005591277606933, "grad_norm": 0.4521594367800063, "learning_rate": 9.966296648693749e-06, "loss": 0.4066, "step": 477 }, { "epoch": 0.40089460441710933, "grad_norm": 0.48223399288425545, "learning_rate": 9.965728476501932e-06, "loss": 0.3939, "step": 478 }, { "epoch": 0.4017332960581493, "grad_norm": 0.42216368310814506, "learning_rate": 9.965155571592415e-06, "loss": 0.3896, "step": 479 }, { "epoch": 0.4025719876991893, "grad_norm": 0.4672292572230476, "learning_rate": 9.964577934511218e-06, "loss": 0.3929, "step": 480 }, { "epoch": 0.40341067934022923, "grad_norm": 0.4643419129685461, "learning_rate": 9.963995565808876e-06, "loss": 0.4007, "step": 481 }, { "epoch": 0.40424937098126923, "grad_norm": 0.46468655110242735, "learning_rate": 9.963408466040427e-06, "loss": 0.425, "step": 482 }, { "epoch": 0.4050880626223092, "grad_norm": 0.4508061043920571, "learning_rate": 9.962816635765426e-06, "loss": 0.3965, "step": 483 }, { "epoch": 0.4059267542633492, "grad_norm": 0.42868416623152805, "learning_rate": 9.962220075547931e-06, "loss": 0.3724, "step": 484 }, { "epoch": 0.4067654459043892, "grad_norm": 0.5066638475519756, "learning_rate": 9.961618785956509e-06, "loss": 0.3969, "step": 485 }, { "epoch": 0.4076041375454291, "grad_norm": 0.4743790138331822, "learning_rate": 9.961012767564233e-06, "loss": 0.3819, "step": 486 }, { "epoch": 0.4084428291864691, "grad_norm": 0.4697543342736762, "learning_rate": 9.96040202094869e-06, "loss": 0.4158, "step": 487 }, { "epoch": 0.40928152082750907, "grad_norm": 0.49377591021275935, "learning_rate": 9.959786546691962e-06, "loss": 0.4041, "step": 488 }, { "epoch": 0.4101202124685491, "grad_norm": 0.5073591986434035, "learning_rate": 9.959166345380644e-06, "loss": 0.3859, "step": 489 }, { "epoch": 0.410958904109589, "grad_norm": 0.4140965745439345, "learning_rate": 9.958541417605839e-06, "loss": 0.3884, "step": 490 }, { "epoch": 0.411797595750629, "grad_norm": 0.5404027859929834, "learning_rate": 9.957911763963145e-06, "loss": 0.3985, "step": 491 }, { "epoch": 0.412636287391669, "grad_norm": 0.458759969551825, "learning_rate": 9.957277385052676e-06, "loss": 0.3818, "step": 492 }, { "epoch": 0.41347497903270897, "grad_norm": 0.5209109361727281, "learning_rate": 9.95663828147904e-06, "loss": 0.436, "step": 493 }, { "epoch": 0.41431367067374897, "grad_norm": 0.47707272179357574, "learning_rate": 9.955994453851352e-06, "loss": 0.3735, "step": 494 }, { "epoch": 0.4151523623147889, "grad_norm": 0.516688723763339, "learning_rate": 9.955345902783229e-06, "loss": 0.392, "step": 495 }, { "epoch": 0.4159910539558289, "grad_norm": 0.5432658922374286, "learning_rate": 9.95469262889279e-06, "loss": 0.3966, "step": 496 }, { "epoch": 0.41682974559686886, "grad_norm": 0.5548629233293001, "learning_rate": 9.954034632802653e-06, "loss": 0.3838, "step": 497 }, { "epoch": 0.41766843723790886, "grad_norm": 0.5727135993464891, "learning_rate": 9.95337191513994e-06, "loss": 0.4003, "step": 498 }, { "epoch": 0.41850712887894886, "grad_norm": 0.4985942458712277, "learning_rate": 9.952704476536272e-06, "loss": 0.3735, "step": 499 }, { "epoch": 0.4193458205199888, "grad_norm": 0.5491113234516056, "learning_rate": 9.952032317627767e-06, "loss": 0.4054, "step": 500 }, { "epoch": 0.4201845121610288, "grad_norm": 0.4803377248906698, "learning_rate": 9.951355439055045e-06, "loss": 0.3739, "step": 501 }, { "epoch": 0.42102320380206876, "grad_norm": 0.6687362407605851, "learning_rate": 9.950673841463223e-06, "loss": 0.3992, "step": 502 }, { "epoch": 0.42186189544310876, "grad_norm": 0.4386561726617932, "learning_rate": 9.949987525501914e-06, "loss": 0.3817, "step": 503 }, { "epoch": 0.4227005870841487, "grad_norm": 0.4135488841011375, "learning_rate": 9.949296491825231e-06, "loss": 0.3854, "step": 504 }, { "epoch": 0.4235392787251887, "grad_norm": 0.581991590245601, "learning_rate": 9.94860074109178e-06, "loss": 0.3995, "step": 505 }, { "epoch": 0.4243779703662287, "grad_norm": 0.45009429899704456, "learning_rate": 9.947900273964669e-06, "loss": 0.4004, "step": 506 }, { "epoch": 0.42521666200726865, "grad_norm": 0.5517943738935336, "learning_rate": 9.94719509111149e-06, "loss": 0.3881, "step": 507 }, { "epoch": 0.42605535364830865, "grad_norm": 0.4662857255500653, "learning_rate": 9.94648519320434e-06, "loss": 0.3878, "step": 508 }, { "epoch": 0.4268940452893486, "grad_norm": 0.45669954403958996, "learning_rate": 9.945770580919805e-06, "loss": 0.3742, "step": 509 }, { "epoch": 0.4277327369303886, "grad_norm": 0.6326260409641961, "learning_rate": 9.945051254938965e-06, "loss": 0.3957, "step": 510 }, { "epoch": 0.42857142857142855, "grad_norm": 0.4650692806775579, "learning_rate": 9.944327215947394e-06, "loss": 0.3906, "step": 511 }, { "epoch": 0.42941012021246855, "grad_norm": 0.4337991394087463, "learning_rate": 9.943598464635153e-06, "loss": 0.3928, "step": 512 }, { "epoch": 0.43024881185350855, "grad_norm": 0.4872451050562082, "learning_rate": 9.9428650016968e-06, "loss": 0.4133, "step": 513 }, { "epoch": 0.4310875034945485, "grad_norm": 0.5453538244017161, "learning_rate": 9.94212682783138e-06, "loss": 0.3636, "step": 514 }, { "epoch": 0.4319261951355885, "grad_norm": 0.47689903388827043, "learning_rate": 9.941383943742429e-06, "loss": 0.388, "step": 515 }, { "epoch": 0.43276488677662844, "grad_norm": 0.5249280571853444, "learning_rate": 9.940636350137973e-06, "loss": 0.3719, "step": 516 }, { "epoch": 0.43360357841766844, "grad_norm": 0.5523706677021881, "learning_rate": 9.939884047730526e-06, "loss": 0.3892, "step": 517 }, { "epoch": 0.4344422700587084, "grad_norm": 0.5154256216043663, "learning_rate": 9.939127037237088e-06, "loss": 0.3971, "step": 518 }, { "epoch": 0.4352809616997484, "grad_norm": 0.5304629216917536, "learning_rate": 9.938365319379149e-06, "loss": 0.3986, "step": 519 }, { "epoch": 0.4361196533407884, "grad_norm": 0.5891171225943925, "learning_rate": 9.937598894882683e-06, "loss": 0.4221, "step": 520 }, { "epoch": 0.43695834498182834, "grad_norm": 0.4906009628319537, "learning_rate": 9.936827764478152e-06, "loss": 0.3679, "step": 521 }, { "epoch": 0.43779703662286834, "grad_norm": 0.511723151174949, "learning_rate": 9.936051928900502e-06, "loss": 0.3983, "step": 522 }, { "epoch": 0.4386357282639083, "grad_norm": 0.49147290799501453, "learning_rate": 9.93527138888916e-06, "loss": 0.3897, "step": 523 }, { "epoch": 0.4394744199049483, "grad_norm": 0.4937163579737561, "learning_rate": 9.934486145188047e-06, "loss": 0.4032, "step": 524 }, { "epoch": 0.44031311154598823, "grad_norm": 0.494176853330681, "learning_rate": 9.933696198545553e-06, "loss": 0.3896, "step": 525 }, { "epoch": 0.44115180318702824, "grad_norm": 0.48132418060574766, "learning_rate": 9.932901549714564e-06, "loss": 0.3826, "step": 526 }, { "epoch": 0.44199049482806824, "grad_norm": 0.4304712037727493, "learning_rate": 9.932102199452435e-06, "loss": 0.3663, "step": 527 }, { "epoch": 0.4428291864691082, "grad_norm": 0.5022351292944798, "learning_rate": 9.93129814852101e-06, "loss": 0.4198, "step": 528 }, { "epoch": 0.4436678781101482, "grad_norm": 0.4199043738402027, "learning_rate": 9.930489397686613e-06, "loss": 0.3719, "step": 529 }, { "epoch": 0.44450656975118813, "grad_norm": 0.46604387014043175, "learning_rate": 9.929675947720042e-06, "loss": 0.3851, "step": 530 }, { "epoch": 0.44534526139222813, "grad_norm": 0.4873879567965408, "learning_rate": 9.928857799396582e-06, "loss": 0.3877, "step": 531 }, { "epoch": 0.4461839530332681, "grad_norm": 0.5314992508184969, "learning_rate": 9.928034953495982e-06, "loss": 0.4089, "step": 532 }, { "epoch": 0.4470226446743081, "grad_norm": 0.4584407483047496, "learning_rate": 9.927207410802483e-06, "loss": 0.4, "step": 533 }, { "epoch": 0.4478613363153481, "grad_norm": 0.44075684651802094, "learning_rate": 9.926375172104795e-06, "loss": 0.3836, "step": 534 }, { "epoch": 0.448700027956388, "grad_norm": 0.5440535049524357, "learning_rate": 9.925538238196104e-06, "loss": 0.3797, "step": 535 }, { "epoch": 0.449538719597428, "grad_norm": 0.4762965160910607, "learning_rate": 9.924696609874074e-06, "loss": 0.3806, "step": 536 }, { "epoch": 0.450377411238468, "grad_norm": 0.4913630283284387, "learning_rate": 9.923850287940839e-06, "loss": 0.3985, "step": 537 }, { "epoch": 0.451216102879508, "grad_norm": 0.5186126490651013, "learning_rate": 9.922999273203008e-06, "loss": 0.3862, "step": 538 }, { "epoch": 0.4520547945205479, "grad_norm": 0.4296599759598784, "learning_rate": 9.922143566471664e-06, "loss": 0.3972, "step": 539 }, { "epoch": 0.4528934861615879, "grad_norm": 0.4902136300279318, "learning_rate": 9.921283168562361e-06, "loss": 0.3824, "step": 540 }, { "epoch": 0.4537321778026279, "grad_norm": 0.5260707503985257, "learning_rate": 9.920418080295125e-06, "loss": 0.3741, "step": 541 }, { "epoch": 0.45457086944366787, "grad_norm": 0.485384766722642, "learning_rate": 9.919548302494446e-06, "loss": 0.3898, "step": 542 }, { "epoch": 0.45540956108470787, "grad_norm": 0.508314766604945, "learning_rate": 9.918673835989294e-06, "loss": 0.4103, "step": 543 }, { "epoch": 0.4562482527257478, "grad_norm": 0.4608397427737612, "learning_rate": 9.9177946816131e-06, "loss": 0.3827, "step": 544 }, { "epoch": 0.4570869443667878, "grad_norm": 0.4999637307707687, "learning_rate": 9.916910840203763e-06, "loss": 0.3543, "step": 545 }, { "epoch": 0.45792563600782776, "grad_norm": 0.5452675291447736, "learning_rate": 9.916022312603657e-06, "loss": 0.3859, "step": 546 }, { "epoch": 0.45876432764886776, "grad_norm": 0.49892501044922494, "learning_rate": 9.91512909965961e-06, "loss": 0.407, "step": 547 }, { "epoch": 0.45960301928990777, "grad_norm": 0.5472631641830765, "learning_rate": 9.914231202222927e-06, "loss": 0.3847, "step": 548 }, { "epoch": 0.4604417109309477, "grad_norm": 0.4922160651467783, "learning_rate": 9.91332862114937e-06, "loss": 0.4145, "step": 549 }, { "epoch": 0.4612804025719877, "grad_norm": 0.5068560550094955, "learning_rate": 9.912421357299167e-06, "loss": 0.3915, "step": 550 }, { "epoch": 0.46211909421302766, "grad_norm": 0.5000372835754325, "learning_rate": 9.91150941153701e-06, "loss": 0.3876, "step": 551 }, { "epoch": 0.46295778585406766, "grad_norm": 0.4640217978935664, "learning_rate": 9.910592784732056e-06, "loss": 0.3973, "step": 552 }, { "epoch": 0.4637964774951076, "grad_norm": 0.5100960832180879, "learning_rate": 9.909671477757917e-06, "loss": 0.3843, "step": 553 }, { "epoch": 0.4646351691361476, "grad_norm": 0.5150385139291673, "learning_rate": 9.908745491492669e-06, "loss": 0.4017, "step": 554 }, { "epoch": 0.4654738607771876, "grad_norm": 0.5138756935624693, "learning_rate": 9.907814826818848e-06, "loss": 0.4021, "step": 555 }, { "epoch": 0.46631255241822755, "grad_norm": 0.5395725090917306, "learning_rate": 9.90687948462345e-06, "loss": 0.3712, "step": 556 }, { "epoch": 0.46715124405926756, "grad_norm": 0.4792887346610594, "learning_rate": 9.905939465797925e-06, "loss": 0.3696, "step": 557 }, { "epoch": 0.4679899357003075, "grad_norm": 0.46124297425745237, "learning_rate": 9.904994771238184e-06, "loss": 0.4036, "step": 558 }, { "epoch": 0.4688286273413475, "grad_norm": 0.46499799819499005, "learning_rate": 9.90404540184459e-06, "loss": 0.3745, "step": 559 }, { "epoch": 0.46966731898238745, "grad_norm": 0.5095832432377555, "learning_rate": 9.90309135852197e-06, "loss": 0.3782, "step": 560 }, { "epoch": 0.47050601062342745, "grad_norm": 0.4400691968075781, "learning_rate": 9.902132642179595e-06, "loss": 0.3905, "step": 561 }, { "epoch": 0.47134470226446745, "grad_norm": 0.4727896571192994, "learning_rate": 9.901169253731197e-06, "loss": 0.3785, "step": 562 }, { "epoch": 0.4721833939055074, "grad_norm": 0.46611130463625544, "learning_rate": 9.900201194094957e-06, "loss": 0.4101, "step": 563 }, { "epoch": 0.4730220855465474, "grad_norm": 0.4601465843285113, "learning_rate": 9.899228464193511e-06, "loss": 0.4115, "step": 564 }, { "epoch": 0.47386077718758735, "grad_norm": 0.49605781546502425, "learning_rate": 9.898251064953945e-06, "loss": 0.3777, "step": 565 }, { "epoch": 0.47469946882862735, "grad_norm": 0.48792236266147115, "learning_rate": 9.897268997307793e-06, "loss": 0.3927, "step": 566 }, { "epoch": 0.4755381604696673, "grad_norm": 0.48944629787516986, "learning_rate": 9.896282262191044e-06, "loss": 0.3897, "step": 567 }, { "epoch": 0.4763768521107073, "grad_norm": 0.45553452726367094, "learning_rate": 9.895290860544128e-06, "loss": 0.3835, "step": 568 }, { "epoch": 0.4772155437517473, "grad_norm": 0.4924645722920791, "learning_rate": 9.894294793311929e-06, "loss": 0.3995, "step": 569 }, { "epoch": 0.47805423539278724, "grad_norm": 0.49410721133645286, "learning_rate": 9.893294061443773e-06, "loss": 0.3823, "step": 570 }, { "epoch": 0.47889292703382724, "grad_norm": 0.47314017674204745, "learning_rate": 9.892288665893434e-06, "loss": 0.3946, "step": 571 }, { "epoch": 0.4797316186748672, "grad_norm": 0.4809982760145354, "learning_rate": 9.891278607619132e-06, "loss": 0.3567, "step": 572 }, { "epoch": 0.4805703103159072, "grad_norm": 0.4448857000501885, "learning_rate": 9.890263887583528e-06, "loss": 0.3856, "step": 573 }, { "epoch": 0.48140900195694714, "grad_norm": 0.4947378755391866, "learning_rate": 9.889244506753728e-06, "loss": 0.3822, "step": 574 }, { "epoch": 0.48224769359798714, "grad_norm": 0.5305814496092924, "learning_rate": 9.88822046610128e-06, "loss": 0.4068, "step": 575 }, { "epoch": 0.48308638523902714, "grad_norm": 0.43339519434967066, "learning_rate": 9.887191766602173e-06, "loss": 0.3704, "step": 576 }, { "epoch": 0.4839250768800671, "grad_norm": 0.42174112877441366, "learning_rate": 9.886158409236835e-06, "loss": 0.3994, "step": 577 }, { "epoch": 0.4847637685211071, "grad_norm": 0.58472985524457, "learning_rate": 9.885120394990135e-06, "loss": 0.3853, "step": 578 }, { "epoch": 0.48560246016214703, "grad_norm": 0.4462734871926013, "learning_rate": 9.88407772485138e-06, "loss": 0.3793, "step": 579 }, { "epoch": 0.48644115180318703, "grad_norm": 0.48815201109149153, "learning_rate": 9.883030399814314e-06, "loss": 0.3953, "step": 580 }, { "epoch": 0.487279843444227, "grad_norm": 0.45280779849669933, "learning_rate": 9.881978420877117e-06, "loss": 0.3778, "step": 581 }, { "epoch": 0.488118535085267, "grad_norm": 0.496608886111258, "learning_rate": 9.880921789042407e-06, "loss": 0.4017, "step": 582 }, { "epoch": 0.488957226726307, "grad_norm": 0.4940425780590836, "learning_rate": 9.87986050531723e-06, "loss": 0.44, "step": 583 }, { "epoch": 0.4897959183673469, "grad_norm": 0.5283834040638051, "learning_rate": 9.878794570713074e-06, "loss": 0.3904, "step": 584 }, { "epoch": 0.49063461000838693, "grad_norm": 0.5132312579386749, "learning_rate": 9.877723986245856e-06, "loss": 0.4242, "step": 585 }, { "epoch": 0.4914733016494269, "grad_norm": 0.5121354325780888, "learning_rate": 9.876648752935924e-06, "loss": 0.3918, "step": 586 }, { "epoch": 0.4923119932904669, "grad_norm": 0.4572724035046639, "learning_rate": 9.875568871808056e-06, "loss": 0.391, "step": 587 }, { "epoch": 0.4931506849315068, "grad_norm": 0.46328258031616554, "learning_rate": 9.874484343891465e-06, "loss": 0.381, "step": 588 }, { "epoch": 0.4939893765725468, "grad_norm": 0.500466985078596, "learning_rate": 9.873395170219784e-06, "loss": 0.4012, "step": 589 }, { "epoch": 0.4948280682135868, "grad_norm": 0.48076522088000656, "learning_rate": 9.872301351831079e-06, "loss": 0.364, "step": 590 }, { "epoch": 0.49566675985462677, "grad_norm": 0.5174907084029576, "learning_rate": 9.871202889767847e-06, "loss": 0.42, "step": 591 }, { "epoch": 0.49650545149566677, "grad_norm": 0.5512037640653978, "learning_rate": 9.870099785077001e-06, "loss": 0.3749, "step": 592 }, { "epoch": 0.4973441431367067, "grad_norm": 0.517818200258843, "learning_rate": 9.868992038809885e-06, "loss": 0.3847, "step": 593 }, { "epoch": 0.4981828347777467, "grad_norm": 0.44631103014126544, "learning_rate": 9.867879652022268e-06, "loss": 0.3813, "step": 594 }, { "epoch": 0.49902152641878667, "grad_norm": 0.5328142819481403, "learning_rate": 9.866762625774336e-06, "loss": 0.3842, "step": 595 }, { "epoch": 0.49986021805982667, "grad_norm": 0.5009039138920848, "learning_rate": 9.865640961130703e-06, "loss": 0.3924, "step": 596 }, { "epoch": 0.5006989097008666, "grad_norm": 0.47461075398001745, "learning_rate": 9.864514659160401e-06, "loss": 0.4036, "step": 597 }, { "epoch": 0.5015376013419066, "grad_norm": 0.4217367240938889, "learning_rate": 9.863383720936877e-06, "loss": 0.3786, "step": 598 }, { "epoch": 0.5023762929829466, "grad_norm": 0.4928097958079681, "learning_rate": 9.862248147538008e-06, "loss": 0.3755, "step": 599 }, { "epoch": 0.5032149846239866, "grad_norm": 0.46185717521131253, "learning_rate": 9.861107940046076e-06, "loss": 0.3709, "step": 600 }, { "epoch": 0.5040536762650265, "grad_norm": 0.5145402692643581, "learning_rate": 9.859963099547789e-06, "loss": 0.408, "step": 601 }, { "epoch": 0.5048923679060665, "grad_norm": 0.4893343862112022, "learning_rate": 9.858813627134267e-06, "loss": 0.3799, "step": 602 }, { "epoch": 0.5057310595471065, "grad_norm": 0.48920772278905456, "learning_rate": 9.857659523901043e-06, "loss": 0.393, "step": 603 }, { "epoch": 0.5065697511881465, "grad_norm": 0.4374726706857885, "learning_rate": 9.856500790948068e-06, "loss": 0.3672, "step": 604 }, { "epoch": 0.5074084428291865, "grad_norm": 0.47881428315500735, "learning_rate": 9.855337429379702e-06, "loss": 0.3555, "step": 605 }, { "epoch": 0.5082471344702264, "grad_norm": 0.4859810580098026, "learning_rate": 9.854169440304716e-06, "loss": 0.3857, "step": 606 }, { "epoch": 0.5090858261112664, "grad_norm": 0.44887338722001174, "learning_rate": 9.852996824836294e-06, "loss": 0.3833, "step": 607 }, { "epoch": 0.5099245177523064, "grad_norm": 0.45947730000336434, "learning_rate": 9.851819584092026e-06, "loss": 0.3692, "step": 608 }, { "epoch": 0.5107632093933464, "grad_norm": 0.49566876230093415, "learning_rate": 9.850637719193913e-06, "loss": 0.3631, "step": 609 }, { "epoch": 0.5116019010343864, "grad_norm": 0.5365232422685219, "learning_rate": 9.849451231268366e-06, "loss": 0.3606, "step": 610 }, { "epoch": 0.5124405926754263, "grad_norm": 0.4611147994783438, "learning_rate": 9.848260121446192e-06, "loss": 0.3676, "step": 611 }, { "epoch": 0.5132792843164663, "grad_norm": 0.5106895827766756, "learning_rate": 9.847064390862612e-06, "loss": 0.3837, "step": 612 }, { "epoch": 0.5141179759575063, "grad_norm": 0.47485035958160904, "learning_rate": 9.845864040657251e-06, "loss": 0.4002, "step": 613 }, { "epoch": 0.5149566675985463, "grad_norm": 0.4743693901051514, "learning_rate": 9.844659071974133e-06, "loss": 0.3916, "step": 614 }, { "epoch": 0.5157953592395862, "grad_norm": 0.476823856580873, "learning_rate": 9.843449485961683e-06, "loss": 0.371, "step": 615 }, { "epoch": 0.5166340508806262, "grad_norm": 0.42647575707772495, "learning_rate": 9.84223528377273e-06, "loss": 0.3765, "step": 616 }, { "epoch": 0.5174727425216662, "grad_norm": 0.46778797943999856, "learning_rate": 9.8410164665645e-06, "loss": 0.3649, "step": 617 }, { "epoch": 0.5183114341627062, "grad_norm": 0.43330437041788755, "learning_rate": 9.839793035498622e-06, "loss": 0.3953, "step": 618 }, { "epoch": 0.5191501258037462, "grad_norm": 0.4294338086051906, "learning_rate": 9.838564991741116e-06, "loss": 0.3885, "step": 619 }, { "epoch": 0.5199888174447861, "grad_norm": 0.4747986857464644, "learning_rate": 9.8373323364624e-06, "loss": 0.3785, "step": 620 }, { "epoch": 0.5208275090858261, "grad_norm": 0.4439512033823347, "learning_rate": 9.836095070837293e-06, "loss": 0.3645, "step": 621 }, { "epoch": 0.5216662007268661, "grad_norm": 0.4819598906995843, "learning_rate": 9.834853196045e-06, "loss": 0.3819, "step": 622 }, { "epoch": 0.5225048923679061, "grad_norm": 0.4434254910470358, "learning_rate": 9.833606713269123e-06, "loss": 0.3759, "step": 623 }, { "epoch": 0.5233435840089461, "grad_norm": 0.4376347661053822, "learning_rate": 9.832355623697657e-06, "loss": 0.3711, "step": 624 }, { "epoch": 0.524182275649986, "grad_norm": 0.4575390408171997, "learning_rate": 9.831099928522982e-06, "loss": 0.3813, "step": 625 }, { "epoch": 0.525020967291026, "grad_norm": 0.46722673998759134, "learning_rate": 9.829839628941876e-06, "loss": 0.3898, "step": 626 }, { "epoch": 0.525859658932066, "grad_norm": 0.46139769860266716, "learning_rate": 9.828574726155496e-06, "loss": 0.4106, "step": 627 }, { "epoch": 0.526698350573106, "grad_norm": 0.469628167780819, "learning_rate": 9.827305221369393e-06, "loss": 0.3647, "step": 628 }, { "epoch": 0.5275370422141459, "grad_norm": 0.4837250200099447, "learning_rate": 9.826031115793502e-06, "loss": 0.3833, "step": 629 }, { "epoch": 0.5283757338551859, "grad_norm": 0.47414212517330684, "learning_rate": 9.82475241064214e-06, "loss": 0.3815, "step": 630 }, { "epoch": 0.5292144254962259, "grad_norm": 0.45275537855941084, "learning_rate": 9.823469107134017e-06, "loss": 0.3712, "step": 631 }, { "epoch": 0.5300531171372659, "grad_norm": 0.49787121715130805, "learning_rate": 9.82218120649221e-06, "loss": 0.4275, "step": 632 }, { "epoch": 0.5308918087783059, "grad_norm": 0.49520002441007466, "learning_rate": 9.820888709944193e-06, "loss": 0.3709, "step": 633 }, { "epoch": 0.5317305004193458, "grad_norm": 0.44997229064325456, "learning_rate": 9.819591618721812e-06, "loss": 0.3716, "step": 634 }, { "epoch": 0.5325691920603858, "grad_norm": 0.41561395860173356, "learning_rate": 9.818289934061294e-06, "loss": 0.3626, "step": 635 }, { "epoch": 0.5334078837014258, "grad_norm": 0.5285014477529736, "learning_rate": 9.816983657203243e-06, "loss": 0.3875, "step": 636 }, { "epoch": 0.5342465753424658, "grad_norm": 0.45424378533417015, "learning_rate": 9.815672789392641e-06, "loss": 0.3732, "step": 637 }, { "epoch": 0.5350852669835058, "grad_norm": 0.45809888764214124, "learning_rate": 9.814357331878845e-06, "loss": 0.3907, "step": 638 }, { "epoch": 0.5359239586245457, "grad_norm": 0.4073762380998298, "learning_rate": 9.813037285915584e-06, "loss": 0.3829, "step": 639 }, { "epoch": 0.5367626502655857, "grad_norm": 0.47648455480432966, "learning_rate": 9.811712652760966e-06, "loss": 0.3767, "step": 640 }, { "epoch": 0.5376013419066257, "grad_norm": 0.48448937557395944, "learning_rate": 9.810383433677466e-06, "loss": 0.367, "step": 641 }, { "epoch": 0.5384400335476657, "grad_norm": 0.5619444083230545, "learning_rate": 9.809049629931931e-06, "loss": 0.3626, "step": 642 }, { "epoch": 0.5392787251887056, "grad_norm": 0.5210748924542891, "learning_rate": 9.807711242795577e-06, "loss": 0.3923, "step": 643 }, { "epoch": 0.5401174168297456, "grad_norm": 0.6035913824276836, "learning_rate": 9.806368273543989e-06, "loss": 0.3778, "step": 644 }, { "epoch": 0.5409561084707856, "grad_norm": 0.5254474755326347, "learning_rate": 9.80502072345712e-06, "loss": 0.4016, "step": 645 }, { "epoch": 0.5417948001118256, "grad_norm": 0.5360500101950905, "learning_rate": 9.803668593819286e-06, "loss": 0.387, "step": 646 }, { "epoch": 0.5426334917528656, "grad_norm": 0.528166002842789, "learning_rate": 9.80231188591917e-06, "loss": 0.3719, "step": 647 }, { "epoch": 0.5434721833939055, "grad_norm": 0.4565124698215295, "learning_rate": 9.800950601049824e-06, "loss": 0.3766, "step": 648 }, { "epoch": 0.5443108750349455, "grad_norm": 0.49928035409882615, "learning_rate": 9.799584740508647e-06, "loss": 0.3633, "step": 649 }, { "epoch": 0.5451495666759855, "grad_norm": 0.4785958661101844, "learning_rate": 9.798214305597413e-06, "loss": 0.399, "step": 650 }, { "epoch": 0.5459882583170255, "grad_norm": 0.5222920726151461, "learning_rate": 9.796839297622252e-06, "loss": 0.3812, "step": 651 }, { "epoch": 0.5468269499580655, "grad_norm": 0.4329737134796205, "learning_rate": 9.795459717893648e-06, "loss": 0.3797, "step": 652 }, { "epoch": 0.5476656415991054, "grad_norm": 0.5066247281822537, "learning_rate": 9.794075567726448e-06, "loss": 0.3819, "step": 653 }, { "epoch": 0.5485043332401454, "grad_norm": 0.567910970227195, "learning_rate": 9.792686848439852e-06, "loss": 0.3894, "step": 654 }, { "epoch": 0.5493430248811854, "grad_norm": 0.5332840242882676, "learning_rate": 9.791293561357416e-06, "loss": 0.3756, "step": 655 }, { "epoch": 0.5501817165222254, "grad_norm": 0.49257159944987094, "learning_rate": 9.789895707807047e-06, "loss": 0.4098, "step": 656 }, { "epoch": 0.5510204081632653, "grad_norm": 0.4770774813796676, "learning_rate": 9.788493289121009e-06, "loss": 0.4021, "step": 657 }, { "epoch": 0.5518590998043053, "grad_norm": 0.454541554565785, "learning_rate": 9.787086306635908e-06, "loss": 0.3724, "step": 658 }, { "epoch": 0.5526977914453453, "grad_norm": 0.4304960390467968, "learning_rate": 9.785674761692713e-06, "loss": 0.3806, "step": 659 }, { "epoch": 0.5535364830863853, "grad_norm": 0.4847405584658195, "learning_rate": 9.78425865563673e-06, "loss": 0.3793, "step": 660 }, { "epoch": 0.5543751747274253, "grad_norm": 0.5125558974739887, "learning_rate": 9.782837989817615e-06, "loss": 0.399, "step": 661 }, { "epoch": 0.5552138663684651, "grad_norm": 0.4485864369832923, "learning_rate": 9.781412765589373e-06, "loss": 0.3522, "step": 662 }, { "epoch": 0.5560525580095051, "grad_norm": 0.5311677286081121, "learning_rate": 9.77998298431035e-06, "loss": 0.389, "step": 663 }, { "epoch": 0.5568912496505452, "grad_norm": 0.554416005727139, "learning_rate": 9.778548647343239e-06, "loss": 0.3885, "step": 664 }, { "epoch": 0.5577299412915852, "grad_norm": 0.4233666665069327, "learning_rate": 9.777109756055067e-06, "loss": 0.3728, "step": 665 }, { "epoch": 0.5585686329326252, "grad_norm": 0.5392684473533712, "learning_rate": 9.775666311817213e-06, "loss": 0.3776, "step": 666 }, { "epoch": 0.559407324573665, "grad_norm": 0.4907550697057686, "learning_rate": 9.774218316005387e-06, "loss": 0.3971, "step": 667 }, { "epoch": 0.560246016214705, "grad_norm": 0.5371739027386699, "learning_rate": 9.772765769999637e-06, "loss": 0.4018, "step": 668 }, { "epoch": 0.561084707855745, "grad_norm": 0.4632496568062338, "learning_rate": 9.771308675184354e-06, "loss": 0.3648, "step": 669 }, { "epoch": 0.561923399496785, "grad_norm": 0.45151346564472894, "learning_rate": 9.769847032948258e-06, "loss": 0.3965, "step": 670 }, { "epoch": 0.5627620911378249, "grad_norm": 0.48540643999729727, "learning_rate": 9.76838084468441e-06, "loss": 0.3695, "step": 671 }, { "epoch": 0.5636007827788649, "grad_norm": 0.48889274536511995, "learning_rate": 9.76691011179019e-06, "loss": 0.3724, "step": 672 }, { "epoch": 0.5644394744199049, "grad_norm": 0.5246476353488331, "learning_rate": 9.765434835667326e-06, "loss": 0.3771, "step": 673 }, { "epoch": 0.5652781660609449, "grad_norm": 0.4873983651975064, "learning_rate": 9.763955017721868e-06, "loss": 0.3868, "step": 674 }, { "epoch": 0.5661168577019849, "grad_norm": 0.518435342796674, "learning_rate": 9.762470659364192e-06, "loss": 0.3693, "step": 675 }, { "epoch": 0.5669555493430248, "grad_norm": 0.4781699984522603, "learning_rate": 9.760981762009005e-06, "loss": 0.3942, "step": 676 }, { "epoch": 0.5677942409840648, "grad_norm": 0.5104541653944382, "learning_rate": 9.759488327075344e-06, "loss": 0.3853, "step": 677 }, { "epoch": 0.5686329326251048, "grad_norm": 0.6059016842334833, "learning_rate": 9.757990355986562e-06, "loss": 0.3892, "step": 678 }, { "epoch": 0.5694716242661448, "grad_norm": 0.48103152850017167, "learning_rate": 9.75648785017034e-06, "loss": 0.3892, "step": 679 }, { "epoch": 0.5703103159071848, "grad_norm": 0.5472846704043492, "learning_rate": 9.754980811058683e-06, "loss": 0.3757, "step": 680 }, { "epoch": 0.5711490075482247, "grad_norm": 0.4567535486220505, "learning_rate": 9.753469240087912e-06, "loss": 0.3718, "step": 681 }, { "epoch": 0.5719876991892647, "grad_norm": 0.5192177095014886, "learning_rate": 9.751953138698671e-06, "loss": 0.3705, "step": 682 }, { "epoch": 0.5728263908303047, "grad_norm": 0.5758266204002921, "learning_rate": 9.750432508335918e-06, "loss": 0.3919, "step": 683 }, { "epoch": 0.5736650824713447, "grad_norm": 0.44442012949636467, "learning_rate": 9.748907350448933e-06, "loss": 0.3839, "step": 684 }, { "epoch": 0.5745037741123847, "grad_norm": 0.518736454224958, "learning_rate": 9.747377666491303e-06, "loss": 0.3883, "step": 685 }, { "epoch": 0.5753424657534246, "grad_norm": 0.43630129598923917, "learning_rate": 9.745843457920938e-06, "loss": 0.3638, "step": 686 }, { "epoch": 0.5761811573944646, "grad_norm": 0.47907092353435554, "learning_rate": 9.744304726200052e-06, "loss": 0.3915, "step": 687 }, { "epoch": 0.5770198490355046, "grad_norm": 0.4633989172293345, "learning_rate": 9.742761472795174e-06, "loss": 0.3565, "step": 688 }, { "epoch": 0.5778585406765446, "grad_norm": 0.46296852004794564, "learning_rate": 9.741213699177147e-06, "loss": 0.3714, "step": 689 }, { "epoch": 0.5786972323175845, "grad_norm": 0.4884605239278814, "learning_rate": 9.73966140682111e-06, "loss": 0.3785, "step": 690 }, { "epoch": 0.5795359239586245, "grad_norm": 0.41911738942293525, "learning_rate": 9.738104597206522e-06, "loss": 0.3668, "step": 691 }, { "epoch": 0.5803746155996645, "grad_norm": 0.4800008699869477, "learning_rate": 9.736543271817138e-06, "loss": 0.3912, "step": 692 }, { "epoch": 0.5812133072407045, "grad_norm": 0.44345061497627064, "learning_rate": 9.73497743214102e-06, "loss": 0.3588, "step": 693 }, { "epoch": 0.5820519988817445, "grad_norm": 0.513031984967633, "learning_rate": 9.733407079670533e-06, "loss": 0.4207, "step": 694 }, { "epoch": 0.5828906905227844, "grad_norm": 0.40374941199375136, "learning_rate": 9.731832215902345e-06, "loss": 0.3793, "step": 695 }, { "epoch": 0.5837293821638244, "grad_norm": 0.4484075027190706, "learning_rate": 9.730252842337418e-06, "loss": 0.3635, "step": 696 }, { "epoch": 0.5845680738048644, "grad_norm": 0.5143673726866893, "learning_rate": 9.728668960481016e-06, "loss": 0.384, "step": 697 }, { "epoch": 0.5854067654459044, "grad_norm": 0.46760714853057045, "learning_rate": 9.7270805718427e-06, "loss": 0.3654, "step": 698 }, { "epoch": 0.5862454570869444, "grad_norm": 0.47826080024316103, "learning_rate": 9.725487677936326e-06, "loss": 0.3807, "step": 699 }, { "epoch": 0.5870841487279843, "grad_norm": 0.5000454423639735, "learning_rate": 9.723890280280042e-06, "loss": 0.3949, "step": 700 }, { "epoch": 0.5879228403690243, "grad_norm": 0.46765381713073556, "learning_rate": 9.72228838039629e-06, "loss": 0.3898, "step": 701 }, { "epoch": 0.5887615320100643, "grad_norm": 0.4133695392462358, "learning_rate": 9.7206819798118e-06, "loss": 0.3854, "step": 702 }, { "epoch": 0.5896002236511043, "grad_norm": 0.4051695333786614, "learning_rate": 9.7190710800576e-06, "loss": 0.3632, "step": 703 }, { "epoch": 0.5904389152921442, "grad_norm": 0.48781774685898366, "learning_rate": 9.717455682668997e-06, "loss": 0.3979, "step": 704 }, { "epoch": 0.5912776069331842, "grad_norm": 0.42963227635605394, "learning_rate": 9.715835789185588e-06, "loss": 0.3852, "step": 705 }, { "epoch": 0.5921162985742242, "grad_norm": 0.404428132782758, "learning_rate": 9.714211401151254e-06, "loss": 0.362, "step": 706 }, { "epoch": 0.5929549902152642, "grad_norm": 0.5312223615026297, "learning_rate": 9.712582520114162e-06, "loss": 0.384, "step": 707 }, { "epoch": 0.5937936818563042, "grad_norm": 0.4351579016156293, "learning_rate": 9.71094914762676e-06, "loss": 0.3647, "step": 708 }, { "epoch": 0.5946323734973441, "grad_norm": 0.4478225312320601, "learning_rate": 9.709311285245776e-06, "loss": 0.3724, "step": 709 }, { "epoch": 0.5954710651383841, "grad_norm": 0.48552364509787804, "learning_rate": 9.707668934532216e-06, "loss": 0.3781, "step": 710 }, { "epoch": 0.5963097567794241, "grad_norm": 0.48850025358141796, "learning_rate": 9.706022097051368e-06, "loss": 0.393, "step": 711 }, { "epoch": 0.5971484484204641, "grad_norm": 0.4174114831794437, "learning_rate": 9.704370774372792e-06, "loss": 0.3797, "step": 712 }, { "epoch": 0.5979871400615041, "grad_norm": 0.43003077838264886, "learning_rate": 9.702714968070324e-06, "loss": 0.3769, "step": 713 }, { "epoch": 0.598825831702544, "grad_norm": 0.4230978226933975, "learning_rate": 9.701054679722075e-06, "loss": 0.3843, "step": 714 }, { "epoch": 0.599664523343584, "grad_norm": 0.4071015423733068, "learning_rate": 9.699389910910426e-06, "loss": 0.3743, "step": 715 }, { "epoch": 0.600503214984624, "grad_norm": 0.5425223125596675, "learning_rate": 9.69772066322203e-06, "loss": 0.3898, "step": 716 }, { "epoch": 0.601341906625664, "grad_norm": 0.5225478722682363, "learning_rate": 9.696046938247804e-06, "loss": 0.4015, "step": 717 }, { "epoch": 0.6021805982667039, "grad_norm": 0.42659242924041046, "learning_rate": 9.694368737582937e-06, "loss": 0.3813, "step": 718 }, { "epoch": 0.6030192899077439, "grad_norm": 0.5176527172686368, "learning_rate": 9.692686062826883e-06, "loss": 0.3947, "step": 719 }, { "epoch": 0.6038579815487839, "grad_norm": 0.47116845406812946, "learning_rate": 9.690998915583358e-06, "loss": 0.3791, "step": 720 }, { "epoch": 0.6046966731898239, "grad_norm": 0.41552094264306677, "learning_rate": 9.689307297460342e-06, "loss": 0.3818, "step": 721 }, { "epoch": 0.6055353648308639, "grad_norm": 0.4397285437558784, "learning_rate": 9.687611210070078e-06, "loss": 0.3697, "step": 722 }, { "epoch": 0.6063740564719038, "grad_norm": 0.4565950140305153, "learning_rate": 9.685910655029063e-06, "loss": 0.3619, "step": 723 }, { "epoch": 0.6072127481129438, "grad_norm": 0.4605955175293516, "learning_rate": 9.684205633958059e-06, "loss": 0.377, "step": 724 }, { "epoch": 0.6080514397539838, "grad_norm": 0.4306931250118525, "learning_rate": 9.682496148482079e-06, "loss": 0.3748, "step": 725 }, { "epoch": 0.6088901313950238, "grad_norm": 0.44713780703458, "learning_rate": 9.680782200230394e-06, "loss": 0.3803, "step": 726 }, { "epoch": 0.6097288230360638, "grad_norm": 0.4225025706206792, "learning_rate": 9.679063790836527e-06, "loss": 0.4002, "step": 727 }, { "epoch": 0.6105675146771037, "grad_norm": 0.4325718157569027, "learning_rate": 9.677340921938252e-06, "loss": 0.3982, "step": 728 }, { "epoch": 0.6114062063181437, "grad_norm": 0.42695724648217137, "learning_rate": 9.675613595177595e-06, "loss": 0.3658, "step": 729 }, { "epoch": 0.6122448979591837, "grad_norm": 0.431313910613093, "learning_rate": 9.673881812200832e-06, "loss": 0.3814, "step": 730 }, { "epoch": 0.6130835896002237, "grad_norm": 0.4435583235514682, "learning_rate": 9.67214557465848e-06, "loss": 0.3793, "step": 731 }, { "epoch": 0.6139222812412636, "grad_norm": 0.4058546926204105, "learning_rate": 9.67040488420531e-06, "loss": 0.3819, "step": 732 }, { "epoch": 0.6147609728823036, "grad_norm": 0.47635458890342636, "learning_rate": 9.668659742500332e-06, "loss": 0.3709, "step": 733 }, { "epoch": 0.6155996645233436, "grad_norm": 0.44091943798426003, "learning_rate": 9.666910151206797e-06, "loss": 0.3842, "step": 734 }, { "epoch": 0.6164383561643836, "grad_norm": 0.4564380630069508, "learning_rate": 9.665156111992199e-06, "loss": 0.3775, "step": 735 }, { "epoch": 0.6172770478054236, "grad_norm": 0.436392201173797, "learning_rate": 9.663397626528272e-06, "loss": 0.3649, "step": 736 }, { "epoch": 0.6181157394464635, "grad_norm": 0.4247840015690318, "learning_rate": 9.661634696490988e-06, "loss": 0.3598, "step": 737 }, { "epoch": 0.6189544310875035, "grad_norm": 0.48788906035733975, "learning_rate": 9.659867323560552e-06, "loss": 0.381, "step": 738 }, { "epoch": 0.6197931227285435, "grad_norm": 0.4578525333667949, "learning_rate": 9.658095509421403e-06, "loss": 0.3697, "step": 739 }, { "epoch": 0.6206318143695835, "grad_norm": 0.4309263753359277, "learning_rate": 9.656319255762218e-06, "loss": 0.359, "step": 740 }, { "epoch": 0.6214705060106235, "grad_norm": 0.5379495323389318, "learning_rate": 9.654538564275903e-06, "loss": 0.4163, "step": 741 }, { "epoch": 0.6223091976516634, "grad_norm": 0.5236189165687949, "learning_rate": 9.65275343665959e-06, "loss": 0.3741, "step": 742 }, { "epoch": 0.6231478892927034, "grad_norm": 0.4804947985864161, "learning_rate": 9.650963874614644e-06, "loss": 0.3914, "step": 743 }, { "epoch": 0.6239865809337434, "grad_norm": 0.6103236890879086, "learning_rate": 9.649169879846652e-06, "loss": 0.3804, "step": 744 }, { "epoch": 0.6248252725747834, "grad_norm": 0.4862737302813073, "learning_rate": 9.64737145406543e-06, "loss": 0.3729, "step": 745 }, { "epoch": 0.6256639642158233, "grad_norm": 0.6026957520156013, "learning_rate": 9.645568598985009e-06, "loss": 0.3853, "step": 746 }, { "epoch": 0.6265026558568633, "grad_norm": 0.5331503426622405, "learning_rate": 9.643761316323654e-06, "loss": 0.3838, "step": 747 }, { "epoch": 0.6273413474979033, "grad_norm": 0.4670576590498335, "learning_rate": 9.641949607803842e-06, "loss": 0.4074, "step": 748 }, { "epoch": 0.6281800391389433, "grad_norm": 0.4877081811753982, "learning_rate": 9.640133475152268e-06, "loss": 0.3675, "step": 749 }, { "epoch": 0.6290187307799833, "grad_norm": 0.4536643066282817, "learning_rate": 9.638312920099842e-06, "loss": 0.372, "step": 750 }, { "epoch": 0.6298574224210232, "grad_norm": 0.5308598782748314, "learning_rate": 9.636487944381696e-06, "loss": 0.3972, "step": 751 }, { "epoch": 0.6306961140620632, "grad_norm": 0.45312306247352757, "learning_rate": 9.63465854973717e-06, "loss": 0.4075, "step": 752 }, { "epoch": 0.6315348057031032, "grad_norm": 0.42099934355048607, "learning_rate": 9.632824737909816e-06, "loss": 0.3576, "step": 753 }, { "epoch": 0.6323734973441432, "grad_norm": 0.47676519552651875, "learning_rate": 9.630986510647398e-06, "loss": 0.3902, "step": 754 }, { "epoch": 0.6332121889851832, "grad_norm": 0.4374897702307618, "learning_rate": 9.629143869701882e-06, "loss": 0.3882, "step": 755 }, { "epoch": 0.6340508806262231, "grad_norm": 0.42718658727684533, "learning_rate": 9.62729681682945e-06, "loss": 0.3505, "step": 756 }, { "epoch": 0.6348895722672631, "grad_norm": 0.4481664135374851, "learning_rate": 9.625445353790484e-06, "loss": 0.3717, "step": 757 }, { "epoch": 0.6357282639083031, "grad_norm": 0.41301542981693884, "learning_rate": 9.623589482349567e-06, "loss": 0.3834, "step": 758 }, { "epoch": 0.6365669555493431, "grad_norm": 0.48420716708843914, "learning_rate": 9.621729204275486e-06, "loss": 0.371, "step": 759 }, { "epoch": 0.637405647190383, "grad_norm": 0.4191679437564546, "learning_rate": 9.61986452134123e-06, "loss": 0.381, "step": 760 }, { "epoch": 0.638244338831423, "grad_norm": 0.4198125291706119, "learning_rate": 9.61799543532398e-06, "loss": 0.3708, "step": 761 }, { "epoch": 0.639083030472463, "grad_norm": 0.4462243793068257, "learning_rate": 9.616121948005124e-06, "loss": 0.3773, "step": 762 }, { "epoch": 0.639921722113503, "grad_norm": 0.43435533474708354, "learning_rate": 9.614244061170233e-06, "loss": 0.3548, "step": 763 }, { "epoch": 0.640760413754543, "grad_norm": 0.46065363329696457, "learning_rate": 9.612361776609076e-06, "loss": 0.3785, "step": 764 }, { "epoch": 0.6415991053955828, "grad_norm": 0.41872222980196416, "learning_rate": 9.610475096115617e-06, "loss": 0.377, "step": 765 }, { "epoch": 0.6424377970366228, "grad_norm": 0.39921502792622426, "learning_rate": 9.608584021488004e-06, "loss": 0.3322, "step": 766 }, { "epoch": 0.6432764886776628, "grad_norm": 0.4560682300084397, "learning_rate": 9.606688554528574e-06, "loss": 0.3985, "step": 767 }, { "epoch": 0.6441151803187029, "grad_norm": 0.42833007409224577, "learning_rate": 9.604788697043855e-06, "loss": 0.3522, "step": 768 }, { "epoch": 0.6449538719597429, "grad_norm": 0.3998577416917513, "learning_rate": 9.602884450844554e-06, "loss": 0.3752, "step": 769 }, { "epoch": 0.6457925636007827, "grad_norm": 0.4823715984703615, "learning_rate": 9.600975817745562e-06, "loss": 0.3732, "step": 770 }, { "epoch": 0.6466312552418227, "grad_norm": 0.4606162522710604, "learning_rate": 9.599062799565955e-06, "loss": 0.3939, "step": 771 }, { "epoch": 0.6474699468828627, "grad_norm": 0.45632623269937855, "learning_rate": 9.597145398128982e-06, "loss": 0.3714, "step": 772 }, { "epoch": 0.6483086385239027, "grad_norm": 0.45216977480332676, "learning_rate": 9.595223615262072e-06, "loss": 0.364, "step": 773 }, { "epoch": 0.6491473301649426, "grad_norm": 0.36547581619922964, "learning_rate": 9.593297452796832e-06, "loss": 0.3864, "step": 774 }, { "epoch": 0.6499860218059826, "grad_norm": 0.4971023886649067, "learning_rate": 9.591366912569045e-06, "loss": 0.3706, "step": 775 }, { "epoch": 0.6508247134470226, "grad_norm": 0.41431857455060445, "learning_rate": 9.589431996418656e-06, "loss": 0.376, "step": 776 }, { "epoch": 0.6516634050880626, "grad_norm": 0.41982170983696426, "learning_rate": 9.587492706189794e-06, "loss": 0.3882, "step": 777 }, { "epoch": 0.6525020967291026, "grad_norm": 0.44793380670310173, "learning_rate": 9.585549043730748e-06, "loss": 0.3679, "step": 778 }, { "epoch": 0.6533407883701425, "grad_norm": 0.42304675639897876, "learning_rate": 9.583601010893976e-06, "loss": 0.3855, "step": 779 }, { "epoch": 0.6541794800111825, "grad_norm": 0.3724405918930644, "learning_rate": 9.581648609536098e-06, "loss": 0.3708, "step": 780 }, { "epoch": 0.6550181716522225, "grad_norm": 0.4218225127452727, "learning_rate": 9.57969184151791e-06, "loss": 0.3823, "step": 781 }, { "epoch": 0.6558568632932625, "grad_norm": 0.43294850284799474, "learning_rate": 9.577730708704354e-06, "loss": 0.4007, "step": 782 }, { "epoch": 0.6566955549343025, "grad_norm": 0.4143912218149178, "learning_rate": 9.575765212964542e-06, "loss": 0.3902, "step": 783 }, { "epoch": 0.6575342465753424, "grad_norm": 0.3957630684231553, "learning_rate": 9.57379535617174e-06, "loss": 0.3741, "step": 784 }, { "epoch": 0.6583729382163824, "grad_norm": 0.3865868195125538, "learning_rate": 9.571821140203373e-06, "loss": 0.3558, "step": 785 }, { "epoch": 0.6592116298574224, "grad_norm": 0.417418012336664, "learning_rate": 9.569842566941018e-06, "loss": 0.3491, "step": 786 }, { "epoch": 0.6600503214984624, "grad_norm": 0.43675727970235295, "learning_rate": 9.567859638270407e-06, "loss": 0.3805, "step": 787 }, { "epoch": 0.6608890131395023, "grad_norm": 0.41187600662448975, "learning_rate": 9.56587235608142e-06, "loss": 0.3548, "step": 788 }, { "epoch": 0.6617277047805423, "grad_norm": 0.4296414308332552, "learning_rate": 9.563880722268093e-06, "loss": 0.3947, "step": 789 }, { "epoch": 0.6625663964215823, "grad_norm": 0.46721570820762204, "learning_rate": 9.5618847387286e-06, "loss": 0.3673, "step": 790 }, { "epoch": 0.6634050880626223, "grad_norm": 0.3991451040279652, "learning_rate": 9.559884407365267e-06, "loss": 0.3685, "step": 791 }, { "epoch": 0.6642437797036623, "grad_norm": 0.4402093905501178, "learning_rate": 9.557879730084565e-06, "loss": 0.3578, "step": 792 }, { "epoch": 0.6650824713447022, "grad_norm": 0.4261020629629312, "learning_rate": 9.5558707087971e-06, "loss": 0.3687, "step": 793 }, { "epoch": 0.6659211629857422, "grad_norm": 0.4259078397844864, "learning_rate": 9.553857345417626e-06, "loss": 0.39, "step": 794 }, { "epoch": 0.6667598546267822, "grad_norm": 0.4317883492123596, "learning_rate": 9.551839641865029e-06, "loss": 0.3897, "step": 795 }, { "epoch": 0.6675985462678222, "grad_norm": 0.3937654583960187, "learning_rate": 9.549817600062334e-06, "loss": 0.3602, "step": 796 }, { "epoch": 0.6684372379088622, "grad_norm": 0.41534347671890404, "learning_rate": 9.547791221936704e-06, "loss": 0.3922, "step": 797 }, { "epoch": 0.6692759295499021, "grad_norm": 0.3977611474397575, "learning_rate": 9.545760509419428e-06, "loss": 0.3588, "step": 798 }, { "epoch": 0.6701146211909421, "grad_norm": 0.38776887279242955, "learning_rate": 9.543725464445934e-06, "loss": 0.3776, "step": 799 }, { "epoch": 0.6709533128319821, "grad_norm": 0.43414691582901654, "learning_rate": 9.541686088955772e-06, "loss": 0.3927, "step": 800 }, { "epoch": 0.6717920044730221, "grad_norm": 0.4026148020514343, "learning_rate": 9.539642384892622e-06, "loss": 0.3678, "step": 801 }, { "epoch": 0.672630696114062, "grad_norm": 0.3744687465917873, "learning_rate": 9.537594354204293e-06, "loss": 0.372, "step": 802 }, { "epoch": 0.673469387755102, "grad_norm": 0.4017018532822156, "learning_rate": 9.535541998842711e-06, "loss": 0.3682, "step": 803 }, { "epoch": 0.674308079396142, "grad_norm": 0.40768365645416343, "learning_rate": 9.533485320763928e-06, "loss": 0.3752, "step": 804 }, { "epoch": 0.675146771037182, "grad_norm": 0.44393176522415606, "learning_rate": 9.531424321928115e-06, "loss": 0.3622, "step": 805 }, { "epoch": 0.675985462678222, "grad_norm": 0.45112086868170154, "learning_rate": 9.529359004299563e-06, "loss": 0.394, "step": 806 }, { "epoch": 0.6768241543192619, "grad_norm": 0.3910242096809098, "learning_rate": 9.527289369846675e-06, "loss": 0.3782, "step": 807 }, { "epoch": 0.6776628459603019, "grad_norm": 0.43359942507784693, "learning_rate": 9.52521542054197e-06, "loss": 0.3711, "step": 808 }, { "epoch": 0.6785015376013419, "grad_norm": 0.4840626610254273, "learning_rate": 9.52313715836208e-06, "loss": 0.3756, "step": 809 }, { "epoch": 0.6793402292423819, "grad_norm": 0.36855437830540266, "learning_rate": 9.521054585287746e-06, "loss": 0.3666, "step": 810 }, { "epoch": 0.6801789208834219, "grad_norm": 0.42605763597737184, "learning_rate": 9.518967703303822e-06, "loss": 0.3924, "step": 811 }, { "epoch": 0.6810176125244618, "grad_norm": 0.4440228660894276, "learning_rate": 9.516876514399265e-06, "loss": 0.3816, "step": 812 }, { "epoch": 0.6818563041655018, "grad_norm": 0.4378127116137599, "learning_rate": 9.514781020567134e-06, "loss": 0.3794, "step": 813 }, { "epoch": 0.6826949958065418, "grad_norm": 0.43485186407968773, "learning_rate": 9.5126812238046e-06, "loss": 0.3574, "step": 814 }, { "epoch": 0.6835336874475818, "grad_norm": 0.46231382632205464, "learning_rate": 9.510577126112925e-06, "loss": 0.3772, "step": 815 }, { "epoch": 0.6843723790886217, "grad_norm": 0.3796830783633279, "learning_rate": 9.508468729497476e-06, "loss": 0.3679, "step": 816 }, { "epoch": 0.6852110707296617, "grad_norm": 0.41585540790564784, "learning_rate": 9.506356035967713e-06, "loss": 0.362, "step": 817 }, { "epoch": 0.6860497623707017, "grad_norm": 0.4462990110725754, "learning_rate": 9.504239047537198e-06, "loss": 0.3486, "step": 818 }, { "epoch": 0.6868884540117417, "grad_norm": 0.38685700522908284, "learning_rate": 9.50211776622358e-06, "loss": 0.3726, "step": 819 }, { "epoch": 0.6877271456527817, "grad_norm": 0.4378272246673451, "learning_rate": 9.499992194048604e-06, "loss": 0.3715, "step": 820 }, { "epoch": 0.6885658372938216, "grad_norm": 0.4484822849255835, "learning_rate": 9.497862333038098e-06, "loss": 0.3821, "step": 821 }, { "epoch": 0.6894045289348616, "grad_norm": 0.4560882362192658, "learning_rate": 9.495728185221987e-06, "loss": 0.3798, "step": 822 }, { "epoch": 0.6902432205759016, "grad_norm": 0.4271796569057093, "learning_rate": 9.493589752634273e-06, "loss": 0.3643, "step": 823 }, { "epoch": 0.6910819122169416, "grad_norm": 0.4723583482925718, "learning_rate": 9.491447037313047e-06, "loss": 0.3861, "step": 824 }, { "epoch": 0.6919206038579816, "grad_norm": 0.4288525323014672, "learning_rate": 9.489300041300477e-06, "loss": 0.3628, "step": 825 }, { "epoch": 0.6927592954990215, "grad_norm": 0.46647103461567024, "learning_rate": 9.487148766642818e-06, "loss": 0.3768, "step": 826 }, { "epoch": 0.6935979871400615, "grad_norm": 0.4183901577635097, "learning_rate": 9.484993215390396e-06, "loss": 0.3836, "step": 827 }, { "epoch": 0.6944366787811015, "grad_norm": 0.4638188578369257, "learning_rate": 9.482833389597617e-06, "loss": 0.3967, "step": 828 }, { "epoch": 0.6952753704221415, "grad_norm": 0.42908824248877725, "learning_rate": 9.48066929132296e-06, "loss": 0.3727, "step": 829 }, { "epoch": 0.6961140620631814, "grad_norm": 0.4615418442120385, "learning_rate": 9.478500922628973e-06, "loss": 0.3985, "step": 830 }, { "epoch": 0.6969527537042214, "grad_norm": 0.44861771693333413, "learning_rate": 9.476328285582277e-06, "loss": 0.3779, "step": 831 }, { "epoch": 0.6977914453452614, "grad_norm": 0.42957546859021933, "learning_rate": 9.474151382253564e-06, "loss": 0.3654, "step": 832 }, { "epoch": 0.6986301369863014, "grad_norm": 0.44964935029471825, "learning_rate": 9.471970214717587e-06, "loss": 0.4104, "step": 833 }, { "epoch": 0.6994688286273414, "grad_norm": 0.4406527908009871, "learning_rate": 9.469784785053163e-06, "loss": 0.3609, "step": 834 }, { "epoch": 0.7003075202683813, "grad_norm": 0.45693755593696467, "learning_rate": 9.467595095343176e-06, "loss": 0.396, "step": 835 }, { "epoch": 0.7011462119094213, "grad_norm": 0.4521174588983374, "learning_rate": 9.465401147674565e-06, "loss": 0.3959, "step": 836 }, { "epoch": 0.7019849035504613, "grad_norm": 0.5314504962620601, "learning_rate": 9.46320294413833e-06, "loss": 0.392, "step": 837 }, { "epoch": 0.7028235951915013, "grad_norm": 0.45206361447306914, "learning_rate": 9.461000486829528e-06, "loss": 0.3885, "step": 838 }, { "epoch": 0.7036622868325413, "grad_norm": 0.4597520857008153, "learning_rate": 9.458793777847266e-06, "loss": 0.3697, "step": 839 }, { "epoch": 0.7045009784735812, "grad_norm": 0.5436624763894881, "learning_rate": 9.456582819294708e-06, "loss": 0.3849, "step": 840 }, { "epoch": 0.7053396701146212, "grad_norm": 0.45253477144728194, "learning_rate": 9.454367613279066e-06, "loss": 0.3941, "step": 841 }, { "epoch": 0.7061783617556612, "grad_norm": 0.41784901800913266, "learning_rate": 9.452148161911597e-06, "loss": 0.3697, "step": 842 }, { "epoch": 0.7070170533967012, "grad_norm": 0.5086520214417627, "learning_rate": 9.449924467307613e-06, "loss": 0.3684, "step": 843 }, { "epoch": 0.7078557450377411, "grad_norm": 0.4390760079992227, "learning_rate": 9.447696531586458e-06, "loss": 0.3677, "step": 844 }, { "epoch": 0.7086944366787811, "grad_norm": 0.4383384946040457, "learning_rate": 9.445464356871528e-06, "loss": 0.3628, "step": 845 }, { "epoch": 0.7095331283198211, "grad_norm": 0.4558739659355394, "learning_rate": 9.443227945290257e-06, "loss": 0.3798, "step": 846 }, { "epoch": 0.7103718199608611, "grad_norm": 0.497792022477278, "learning_rate": 9.440987298974112e-06, "loss": 0.3694, "step": 847 }, { "epoch": 0.7112105116019011, "grad_norm": 0.3940644025799204, "learning_rate": 9.438742420058604e-06, "loss": 0.3882, "step": 848 }, { "epoch": 0.712049203242941, "grad_norm": 0.40144093107560747, "learning_rate": 9.436493310683268e-06, "loss": 0.3564, "step": 849 }, { "epoch": 0.712887894883981, "grad_norm": 0.4834281721991978, "learning_rate": 9.434239972991683e-06, "loss": 0.3902, "step": 850 }, { "epoch": 0.713726586525021, "grad_norm": 0.4341550240616742, "learning_rate": 9.431982409131448e-06, "loss": 0.3727, "step": 851 }, { "epoch": 0.714565278166061, "grad_norm": 0.48839014917100504, "learning_rate": 9.429720621254194e-06, "loss": 0.3641, "step": 852 }, { "epoch": 0.715403969807101, "grad_norm": 0.42866234975815715, "learning_rate": 9.427454611515577e-06, "loss": 0.3754, "step": 853 }, { "epoch": 0.7162426614481409, "grad_norm": 0.4899344308316618, "learning_rate": 9.425184382075277e-06, "loss": 0.3852, "step": 854 }, { "epoch": 0.7170813530891809, "grad_norm": 0.47315021256675, "learning_rate": 9.422909935096995e-06, "loss": 0.4048, "step": 855 }, { "epoch": 0.7179200447302209, "grad_norm": 0.5249484014783807, "learning_rate": 9.420631272748455e-06, "loss": 0.3811, "step": 856 }, { "epoch": 0.7187587363712609, "grad_norm": 0.42736437704141095, "learning_rate": 9.418348397201393e-06, "loss": 0.3718, "step": 857 }, { "epoch": 0.7195974280123009, "grad_norm": 0.4129076942247388, "learning_rate": 9.416061310631566e-06, "loss": 0.3596, "step": 858 }, { "epoch": 0.7204361196533408, "grad_norm": 0.39705992536152546, "learning_rate": 9.413770015218738e-06, "loss": 0.3742, "step": 859 }, { "epoch": 0.7212748112943808, "grad_norm": 0.42907632647731847, "learning_rate": 9.411474513146691e-06, "loss": 0.3645, "step": 860 }, { "epoch": 0.7221135029354208, "grad_norm": 0.47382276582450605, "learning_rate": 9.409174806603214e-06, "loss": 0.3663, "step": 861 }, { "epoch": 0.7229521945764608, "grad_norm": 0.4287649604778712, "learning_rate": 9.406870897780102e-06, "loss": 0.3743, "step": 862 }, { "epoch": 0.7237908862175007, "grad_norm": 0.4150295790806263, "learning_rate": 9.404562788873154e-06, "loss": 0.3548, "step": 863 }, { "epoch": 0.7246295778585407, "grad_norm": 0.4811995273707131, "learning_rate": 9.402250482082174e-06, "loss": 0.3705, "step": 864 }, { "epoch": 0.7254682694995807, "grad_norm": 0.42494356805684536, "learning_rate": 9.39993397961097e-06, "loss": 0.3546, "step": 865 }, { "epoch": 0.7263069611406207, "grad_norm": 0.40343084910427685, "learning_rate": 9.397613283667341e-06, "loss": 0.3593, "step": 866 }, { "epoch": 0.7271456527816607, "grad_norm": 0.4177274651825897, "learning_rate": 9.395288396463088e-06, "loss": 0.3523, "step": 867 }, { "epoch": 0.7279843444227005, "grad_norm": 0.45177873684245806, "learning_rate": 9.39295932021401e-06, "loss": 0.3875, "step": 868 }, { "epoch": 0.7288230360637405, "grad_norm": 0.42137462409913173, "learning_rate": 9.39062605713989e-06, "loss": 0.3748, "step": 869 }, { "epoch": 0.7296617277047805, "grad_norm": 0.4133036671394201, "learning_rate": 9.388288609464504e-06, "loss": 0.3693, "step": 870 }, { "epoch": 0.7305004193458206, "grad_norm": 0.4364524858044216, "learning_rate": 9.385946979415622e-06, "loss": 0.3755, "step": 871 }, { "epoch": 0.7313391109868606, "grad_norm": 0.463682220001794, "learning_rate": 9.383601169224995e-06, "loss": 0.3819, "step": 872 }, { "epoch": 0.7321778026279004, "grad_norm": 0.4667877438211276, "learning_rate": 9.381251181128355e-06, "loss": 0.365, "step": 873 }, { "epoch": 0.7330164942689404, "grad_norm": 0.4072604374891933, "learning_rate": 9.378897017365425e-06, "loss": 0.3731, "step": 874 }, { "epoch": 0.7338551859099804, "grad_norm": 0.5337602900018122, "learning_rate": 9.376538680179901e-06, "loss": 0.3771, "step": 875 }, { "epoch": 0.7346938775510204, "grad_norm": 0.45410340900663976, "learning_rate": 9.374176171819456e-06, "loss": 0.3571, "step": 876 }, { "epoch": 0.7355325691920603, "grad_norm": 0.4279011643198671, "learning_rate": 9.371809494535741e-06, "loss": 0.3831, "step": 877 }, { "epoch": 0.7363712608331003, "grad_norm": 0.4952792807258405, "learning_rate": 9.369438650584383e-06, "loss": 0.355, "step": 878 }, { "epoch": 0.7372099524741403, "grad_norm": 0.430268877764739, "learning_rate": 9.367063642224972e-06, "loss": 0.3704, "step": 879 }, { "epoch": 0.7380486441151803, "grad_norm": 0.4752188289569148, "learning_rate": 9.364684471721078e-06, "loss": 0.3882, "step": 880 }, { "epoch": 0.7388873357562203, "grad_norm": 0.44013814001561363, "learning_rate": 9.362301141340225e-06, "loss": 0.3667, "step": 881 }, { "epoch": 0.7397260273972602, "grad_norm": 0.43962585509985036, "learning_rate": 9.359913653353914e-06, "loss": 0.3569, "step": 882 }, { "epoch": 0.7405647190383002, "grad_norm": 0.4793920420322088, "learning_rate": 9.357522010037601e-06, "loss": 0.367, "step": 883 }, { "epoch": 0.7414034106793402, "grad_norm": 0.4064541076178831, "learning_rate": 9.355126213670704e-06, "loss": 0.3788, "step": 884 }, { "epoch": 0.7422421023203802, "grad_norm": 0.4534801785991132, "learning_rate": 9.352726266536598e-06, "loss": 0.3722, "step": 885 }, { "epoch": 0.7430807939614202, "grad_norm": 0.5030929360596512, "learning_rate": 9.35032217092262e-06, "loss": 0.3566, "step": 886 }, { "epoch": 0.7439194856024601, "grad_norm": 0.38045744052968095, "learning_rate": 9.347913929120053e-06, "loss": 0.3788, "step": 887 }, { "epoch": 0.7447581772435001, "grad_norm": 0.408032998687664, "learning_rate": 9.345501543424136e-06, "loss": 0.3615, "step": 888 }, { "epoch": 0.7455968688845401, "grad_norm": 0.4143641178453451, "learning_rate": 9.343085016134054e-06, "loss": 0.3598, "step": 889 }, { "epoch": 0.7464355605255801, "grad_norm": 0.41773143611944336, "learning_rate": 9.340664349552945e-06, "loss": 0.3813, "step": 890 }, { "epoch": 0.74727425216662, "grad_norm": 0.3968621831911624, "learning_rate": 9.33823954598789e-06, "loss": 0.363, "step": 891 }, { "epoch": 0.74811294380766, "grad_norm": 0.4639651238189151, "learning_rate": 9.335810607749906e-06, "loss": 0.3554, "step": 892 }, { "epoch": 0.7489516354487, "grad_norm": 0.4155122548296799, "learning_rate": 9.333377537153963e-06, "loss": 0.3684, "step": 893 }, { "epoch": 0.74979032708974, "grad_norm": 0.4260007159187362, "learning_rate": 9.330940336518956e-06, "loss": 0.3606, "step": 894 }, { "epoch": 0.75062901873078, "grad_norm": 0.4426051576898815, "learning_rate": 9.328499008167726e-06, "loss": 0.3833, "step": 895 }, { "epoch": 0.7514677103718199, "grad_norm": 0.5167766444887606, "learning_rate": 9.326053554427047e-06, "loss": 0.3662, "step": 896 }, { "epoch": 0.7523064020128599, "grad_norm": 0.3793623401702071, "learning_rate": 9.32360397762762e-06, "loss": 0.3426, "step": 897 }, { "epoch": 0.7531450936538999, "grad_norm": 0.4608813081336728, "learning_rate": 9.321150280104078e-06, "loss": 0.38, "step": 898 }, { "epoch": 0.7539837852949399, "grad_norm": 0.42775611855610546, "learning_rate": 9.318692464194984e-06, "loss": 0.3732, "step": 899 }, { "epoch": 0.7548224769359799, "grad_norm": 0.39444320285172096, "learning_rate": 9.316230532242825e-06, "loss": 0.3669, "step": 900 }, { "epoch": 0.7556611685770198, "grad_norm": 0.39468146733927195, "learning_rate": 9.313764486594006e-06, "loss": 0.3655, "step": 901 }, { "epoch": 0.7564998602180598, "grad_norm": 0.4359218804160314, "learning_rate": 9.311294329598858e-06, "loss": 0.3843, "step": 902 }, { "epoch": 0.7573385518590998, "grad_norm": 0.4624062351001727, "learning_rate": 9.30882006361163e-06, "loss": 0.3687, "step": 903 }, { "epoch": 0.7581772435001398, "grad_norm": 0.4032425320639307, "learning_rate": 9.306341690990484e-06, "loss": 0.3579, "step": 904 }, { "epoch": 0.7590159351411797, "grad_norm": 0.462724383504663, "learning_rate": 9.3038592140975e-06, "loss": 0.3691, "step": 905 }, { "epoch": 0.7598546267822197, "grad_norm": 0.4558059664014471, "learning_rate": 9.301372635298665e-06, "loss": 0.3766, "step": 906 }, { "epoch": 0.7606933184232597, "grad_norm": 0.4399587679357466, "learning_rate": 9.298881956963881e-06, "loss": 0.3465, "step": 907 }, { "epoch": 0.7615320100642997, "grad_norm": 0.47405667971091736, "learning_rate": 9.296387181466952e-06, "loss": 0.3705, "step": 908 }, { "epoch": 0.7623707017053397, "grad_norm": 0.4658895197267236, "learning_rate": 9.293888311185593e-06, "loss": 0.3792, "step": 909 }, { "epoch": 0.7632093933463796, "grad_norm": 0.4327618202235587, "learning_rate": 9.291385348501414e-06, "loss": 0.3799, "step": 910 }, { "epoch": 0.7640480849874196, "grad_norm": 0.4789964654874952, "learning_rate": 9.28887829579993e-06, "loss": 0.3727, "step": 911 }, { "epoch": 0.7648867766284596, "grad_norm": 0.4703457008324349, "learning_rate": 9.286367155470552e-06, "loss": 0.378, "step": 912 }, { "epoch": 0.7657254682694996, "grad_norm": 0.45066435289245826, "learning_rate": 9.283851929906594e-06, "loss": 0.3767, "step": 913 }, { "epoch": 0.7665641599105396, "grad_norm": 0.48492348558838655, "learning_rate": 9.28133262150525e-06, "loss": 0.3723, "step": 914 }, { "epoch": 0.7674028515515795, "grad_norm": 0.39406238310791303, "learning_rate": 9.278809232667617e-06, "loss": 0.3772, "step": 915 }, { "epoch": 0.7682415431926195, "grad_norm": 0.4430211894028574, "learning_rate": 9.276281765798675e-06, "loss": 0.3628, "step": 916 }, { "epoch": 0.7690802348336595, "grad_norm": 0.4555631012538427, "learning_rate": 9.273750223307296e-06, "loss": 0.3792, "step": 917 }, { "epoch": 0.7699189264746995, "grad_norm": 0.4617537946179272, "learning_rate": 9.271214607606232e-06, "loss": 0.3655, "step": 918 }, { "epoch": 0.7707576181157394, "grad_norm": 0.45653426458583973, "learning_rate": 9.268674921112115e-06, "loss": 0.3641, "step": 919 }, { "epoch": 0.7715963097567794, "grad_norm": 0.4891758514150904, "learning_rate": 9.26613116624546e-06, "loss": 0.3572, "step": 920 }, { "epoch": 0.7724350013978194, "grad_norm": 0.5286822957695599, "learning_rate": 9.263583345430662e-06, "loss": 0.3726, "step": 921 }, { "epoch": 0.7732736930388594, "grad_norm": 0.4193243334078798, "learning_rate": 9.261031461095988e-06, "loss": 0.3599, "step": 922 }, { "epoch": 0.7741123846798994, "grad_norm": 0.4114662587909733, "learning_rate": 9.258475515673576e-06, "loss": 0.3864, "step": 923 }, { "epoch": 0.7749510763209393, "grad_norm": 0.4679757060412885, "learning_rate": 9.25591551159944e-06, "loss": 0.3779, "step": 924 }, { "epoch": 0.7757897679619793, "grad_norm": 0.43097786100131735, "learning_rate": 9.253351451313457e-06, "loss": 0.3527, "step": 925 }, { "epoch": 0.7766284596030193, "grad_norm": 0.428957227690082, "learning_rate": 9.250783337259371e-06, "loss": 0.3818, "step": 926 }, { "epoch": 0.7774671512440593, "grad_norm": 0.4793385530692493, "learning_rate": 9.248211171884793e-06, "loss": 0.3801, "step": 927 }, { "epoch": 0.7783058428850993, "grad_norm": 0.4583801490384083, "learning_rate": 9.245634957641191e-06, "loss": 0.3827, "step": 928 }, { "epoch": 0.7791445345261392, "grad_norm": 0.42383264484392447, "learning_rate": 9.243054696983894e-06, "loss": 0.3716, "step": 929 }, { "epoch": 0.7799832261671792, "grad_norm": 0.47872878983444905, "learning_rate": 9.240470392372086e-06, "loss": 0.4281, "step": 930 }, { "epoch": 0.7808219178082192, "grad_norm": 0.4234122042825348, "learning_rate": 9.237882046268806e-06, "loss": 0.3863, "step": 931 }, { "epoch": 0.7816606094492592, "grad_norm": 0.4592818644159092, "learning_rate": 9.235289661140946e-06, "loss": 0.3494, "step": 932 }, { "epoch": 0.7824993010902991, "grad_norm": 0.46285660901401693, "learning_rate": 9.23269323945925e-06, "loss": 0.3567, "step": 933 }, { "epoch": 0.7833379927313391, "grad_norm": 0.4349447285587615, "learning_rate": 9.2300927836983e-06, "loss": 0.3621, "step": 934 }, { "epoch": 0.7841766843723791, "grad_norm": 0.47607459036965066, "learning_rate": 9.227488296336531e-06, "loss": 0.3796, "step": 935 }, { "epoch": 0.7850153760134191, "grad_norm": 0.4044449274141869, "learning_rate": 9.224879779856219e-06, "loss": 0.3585, "step": 936 }, { "epoch": 0.7858540676544591, "grad_norm": 0.44183270150609905, "learning_rate": 9.222267236743475e-06, "loss": 0.3848, "step": 937 }, { "epoch": 0.786692759295499, "grad_norm": 0.4228333386598195, "learning_rate": 9.219650669488259e-06, "loss": 0.4012, "step": 938 }, { "epoch": 0.787531450936539, "grad_norm": 0.4051649953025227, "learning_rate": 9.217030080584353e-06, "loss": 0.3868, "step": 939 }, { "epoch": 0.788370142577579, "grad_norm": 0.41855567033048596, "learning_rate": 9.21440547252938e-06, "loss": 0.3619, "step": 940 }, { "epoch": 0.789208834218619, "grad_norm": 0.3924774393877832, "learning_rate": 9.21177684782479e-06, "loss": 0.3785, "step": 941 }, { "epoch": 0.790047525859659, "grad_norm": 0.40728507594392765, "learning_rate": 9.209144208975866e-06, "loss": 0.3791, "step": 942 }, { "epoch": 0.7908862175006989, "grad_norm": 0.41191231572456666, "learning_rate": 9.206507558491709e-06, "loss": 0.3624, "step": 943 }, { "epoch": 0.7917249091417389, "grad_norm": 0.4693453839674, "learning_rate": 9.203866898885252e-06, "loss": 0.3588, "step": 944 }, { "epoch": 0.7925636007827789, "grad_norm": 0.41231817905507767, "learning_rate": 9.201222232673242e-06, "loss": 0.3635, "step": 945 }, { "epoch": 0.7934022924238189, "grad_norm": 0.4443675847738243, "learning_rate": 9.198573562376248e-06, "loss": 0.3847, "step": 946 }, { "epoch": 0.7942409840648588, "grad_norm": 0.3830270025589073, "learning_rate": 9.195920890518657e-06, "loss": 0.3461, "step": 947 }, { "epoch": 0.7950796757058988, "grad_norm": 0.46794719776975496, "learning_rate": 9.193264219628664e-06, "loss": 0.3818, "step": 948 }, { "epoch": 0.7959183673469388, "grad_norm": 0.4168964941766479, "learning_rate": 9.190603552238281e-06, "loss": 0.3752, "step": 949 }, { "epoch": 0.7967570589879788, "grad_norm": 0.3881122267553904, "learning_rate": 9.187938890883327e-06, "loss": 0.3436, "step": 950 }, { "epoch": 0.7975957506290188, "grad_norm": 0.49531018542394967, "learning_rate": 9.185270238103426e-06, "loss": 0.3582, "step": 951 }, { "epoch": 0.7984344422700587, "grad_norm": 0.4462234027115143, "learning_rate": 9.182597596442008e-06, "loss": 0.3673, "step": 952 }, { "epoch": 0.7992731339110987, "grad_norm": 0.45571424696644136, "learning_rate": 9.179920968446306e-06, "loss": 0.3862, "step": 953 }, { "epoch": 0.8001118255521387, "grad_norm": 0.42635193479080113, "learning_rate": 9.177240356667348e-06, "loss": 0.3628, "step": 954 }, { "epoch": 0.8009505171931787, "grad_norm": 0.43050626731721614, "learning_rate": 9.17455576365996e-06, "loss": 0.371, "step": 955 }, { "epoch": 0.8017892088342187, "grad_norm": 0.38919721433997895, "learning_rate": 9.171867191982768e-06, "loss": 0.3467, "step": 956 }, { "epoch": 0.8026279004752586, "grad_norm": 0.49566027645559796, "learning_rate": 9.169174644198181e-06, "loss": 0.3547, "step": 957 }, { "epoch": 0.8034665921162986, "grad_norm": 0.43062722526709873, "learning_rate": 9.16647812287241e-06, "loss": 0.3979, "step": 958 }, { "epoch": 0.8043052837573386, "grad_norm": 0.37445387760508897, "learning_rate": 9.163777630575435e-06, "loss": 0.3403, "step": 959 }, { "epoch": 0.8051439753983786, "grad_norm": 0.4377298450962926, "learning_rate": 9.161073169881039e-06, "loss": 0.3713, "step": 960 }, { "epoch": 0.8059826670394185, "grad_norm": 0.42566955895845954, "learning_rate": 9.158364743366775e-06, "loss": 0.3532, "step": 961 }, { "epoch": 0.8068213586804585, "grad_norm": 0.3837009212708552, "learning_rate": 9.155652353613982e-06, "loss": 0.361, "step": 962 }, { "epoch": 0.8076600503214985, "grad_norm": 0.43963842505793105, "learning_rate": 9.152936003207773e-06, "loss": 0.3829, "step": 963 }, { "epoch": 0.8084987419625385, "grad_norm": 0.41881300468309035, "learning_rate": 9.150215694737039e-06, "loss": 0.3737, "step": 964 }, { "epoch": 0.8093374336035785, "grad_norm": 0.45768523364789215, "learning_rate": 9.147491430794437e-06, "loss": 0.401, "step": 965 }, { "epoch": 0.8101761252446184, "grad_norm": 0.41975407533099857, "learning_rate": 9.144763213976402e-06, "loss": 0.3858, "step": 966 }, { "epoch": 0.8110148168856584, "grad_norm": 0.3529017853535833, "learning_rate": 9.142031046883133e-06, "loss": 0.3279, "step": 967 }, { "epoch": 0.8118535085266984, "grad_norm": 0.425214019494599, "learning_rate": 9.13929493211859e-06, "loss": 0.3673, "step": 968 }, { "epoch": 0.8126922001677384, "grad_norm": 0.44375269412905394, "learning_rate": 9.1365548722905e-06, "loss": 0.3647, "step": 969 }, { "epoch": 0.8135308918087784, "grad_norm": 0.4272702356445519, "learning_rate": 9.133810870010352e-06, "loss": 0.367, "step": 970 }, { "epoch": 0.8143695834498182, "grad_norm": 0.37493727750937367, "learning_rate": 9.131062927893383e-06, "loss": 0.3459, "step": 971 }, { "epoch": 0.8152082750908582, "grad_norm": 0.4024344716625137, "learning_rate": 9.128311048558599e-06, "loss": 0.3629, "step": 972 }, { "epoch": 0.8160469667318982, "grad_norm": 0.43954677719295515, "learning_rate": 9.125555234628746e-06, "loss": 0.3849, "step": 973 }, { "epoch": 0.8168856583729383, "grad_norm": 0.4544546348166353, "learning_rate": 9.122795488730324e-06, "loss": 0.3706, "step": 974 }, { "epoch": 0.8177243500139781, "grad_norm": 0.4030223848099077, "learning_rate": 9.120031813493586e-06, "loss": 0.3716, "step": 975 }, { "epoch": 0.8185630416550181, "grad_norm": 0.5132846080318186, "learning_rate": 9.11726421155252e-06, "loss": 0.355, "step": 976 }, { "epoch": 0.8194017332960581, "grad_norm": 0.44841883416713607, "learning_rate": 9.114492685544863e-06, "loss": 0.3528, "step": 977 }, { "epoch": 0.8202404249370981, "grad_norm": 0.41993956359481477, "learning_rate": 9.111717238112089e-06, "loss": 0.38, "step": 978 }, { "epoch": 0.8210791165781381, "grad_norm": 0.48432971071601405, "learning_rate": 9.108937871899417e-06, "loss": 0.3693, "step": 979 }, { "epoch": 0.821917808219178, "grad_norm": 0.44023595966953616, "learning_rate": 9.106154589555789e-06, "loss": 0.3468, "step": 980 }, { "epoch": 0.822756499860218, "grad_norm": 0.4921689476450014, "learning_rate": 9.103367393733887e-06, "loss": 0.3708, "step": 981 }, { "epoch": 0.823595191501258, "grad_norm": 0.3983831610809587, "learning_rate": 9.100576287090122e-06, "loss": 0.3641, "step": 982 }, { "epoch": 0.824433883142298, "grad_norm": 0.38300345527276747, "learning_rate": 9.097781272284629e-06, "loss": 0.3683, "step": 983 }, { "epoch": 0.825272574783338, "grad_norm": 0.40007098659657886, "learning_rate": 9.094982351981273e-06, "loss": 0.3577, "step": 984 }, { "epoch": 0.8261112664243779, "grad_norm": 0.48953404192542066, "learning_rate": 9.092179528847636e-06, "loss": 0.3701, "step": 985 }, { "epoch": 0.8269499580654179, "grad_norm": 0.37706124617390624, "learning_rate": 9.089372805555023e-06, "loss": 0.3559, "step": 986 }, { "epoch": 0.8277886497064579, "grad_norm": 0.4674096003079363, "learning_rate": 9.086562184778455e-06, "loss": 0.3866, "step": 987 }, { "epoch": 0.8286273413474979, "grad_norm": 0.4444326313865554, "learning_rate": 9.083747669196668e-06, "loss": 0.3405, "step": 988 }, { "epoch": 0.8294660329885378, "grad_norm": 0.41828476736079745, "learning_rate": 9.080929261492109e-06, "loss": 0.3597, "step": 989 }, { "epoch": 0.8303047246295778, "grad_norm": 0.38395435952244417, "learning_rate": 9.078106964350939e-06, "loss": 0.3441, "step": 990 }, { "epoch": 0.8311434162706178, "grad_norm": 0.534899790012475, "learning_rate": 9.075280780463016e-06, "loss": 0.3778, "step": 991 }, { "epoch": 0.8319821079116578, "grad_norm": 0.4223439411314482, "learning_rate": 9.072450712521914e-06, "loss": 0.3993, "step": 992 }, { "epoch": 0.8328207995526978, "grad_norm": 0.427655933015535, "learning_rate": 9.069616763224903e-06, "loss": 0.3534, "step": 993 }, { "epoch": 0.8336594911937377, "grad_norm": 0.4536112840715363, "learning_rate": 9.06677893527295e-06, "loss": 0.3884, "step": 994 }, { "epoch": 0.8344981828347777, "grad_norm": 0.4299536234069838, "learning_rate": 9.063937231370722e-06, "loss": 0.3507, "step": 995 }, { "epoch": 0.8353368744758177, "grad_norm": 0.42316988961177165, "learning_rate": 9.061091654226579e-06, "loss": 0.3659, "step": 996 }, { "epoch": 0.8361755661168577, "grad_norm": 0.4274547156063208, "learning_rate": 9.058242206552575e-06, "loss": 0.3831, "step": 997 }, { "epoch": 0.8370142577578977, "grad_norm": 0.42413102517754875, "learning_rate": 9.05538889106445e-06, "loss": 0.3502, "step": 998 }, { "epoch": 0.8378529493989376, "grad_norm": 0.45811086683257074, "learning_rate": 9.052531710481629e-06, "loss": 0.3749, "step": 999 }, { "epoch": 0.8386916410399776, "grad_norm": 0.39669483988487625, "learning_rate": 9.049670667527227e-06, "loss": 0.3721, "step": 1000 }, { "epoch": 0.8395303326810176, "grad_norm": 0.3910506854254016, "learning_rate": 9.046805764928031e-06, "loss": 0.3638, "step": 1001 }, { "epoch": 0.8403690243220576, "grad_norm": 0.43494776443699473, "learning_rate": 9.043937005414515e-06, "loss": 0.37, "step": 1002 }, { "epoch": 0.8412077159630975, "grad_norm": 0.40440367902767543, "learning_rate": 9.041064391720826e-06, "loss": 0.3643, "step": 1003 }, { "epoch": 0.8420464076041375, "grad_norm": 0.4348268078409116, "learning_rate": 9.038187926584781e-06, "loss": 0.3956, "step": 1004 }, { "epoch": 0.8428850992451775, "grad_norm": 0.45536332070757096, "learning_rate": 9.035307612747873e-06, "loss": 0.3779, "step": 1005 }, { "epoch": 0.8437237908862175, "grad_norm": 0.4506135647572574, "learning_rate": 9.03242345295526e-06, "loss": 0.3636, "step": 1006 }, { "epoch": 0.8445624825272575, "grad_norm": 0.48968975699291084, "learning_rate": 9.029535449955765e-06, "loss": 0.3812, "step": 1007 }, { "epoch": 0.8454011741682974, "grad_norm": 0.45880191879751914, "learning_rate": 9.026643606501876e-06, "loss": 0.373, "step": 1008 }, { "epoch": 0.8462398658093374, "grad_norm": 0.4217521935544451, "learning_rate": 9.023747925349737e-06, "loss": 0.379, "step": 1009 }, { "epoch": 0.8470785574503774, "grad_norm": 0.49172941449589613, "learning_rate": 9.020848409259157e-06, "loss": 0.3809, "step": 1010 }, { "epoch": 0.8479172490914174, "grad_norm": 0.49894105288559737, "learning_rate": 9.017945060993596e-06, "loss": 0.3411, "step": 1011 }, { "epoch": 0.8487559407324574, "grad_norm": 0.43432319337270353, "learning_rate": 9.015037883320163e-06, "loss": 0.3701, "step": 1012 }, { "epoch": 0.8495946323734973, "grad_norm": 0.4464188767921303, "learning_rate": 9.012126879009621e-06, "loss": 0.3879, "step": 1013 }, { "epoch": 0.8504333240145373, "grad_norm": 0.4401394895300372, "learning_rate": 9.009212050836381e-06, "loss": 0.3746, "step": 1014 }, { "epoch": 0.8512720156555773, "grad_norm": 0.43718968072166187, "learning_rate": 9.006293401578494e-06, "loss": 0.3853, "step": 1015 }, { "epoch": 0.8521107072966173, "grad_norm": 0.4426237488218515, "learning_rate": 9.003370934017656e-06, "loss": 0.339, "step": 1016 }, { "epoch": 0.8529493989376572, "grad_norm": 0.4268592724294042, "learning_rate": 9.000444650939202e-06, "loss": 0.3675, "step": 1017 }, { "epoch": 0.8537880905786972, "grad_norm": 0.4264166374598749, "learning_rate": 8.997514555132103e-06, "loss": 0.3469, "step": 1018 }, { "epoch": 0.8546267822197372, "grad_norm": 0.5248552909651146, "learning_rate": 8.994580649388962e-06, "loss": 0.3757, "step": 1019 }, { "epoch": 0.8554654738607772, "grad_norm": 0.43667936214296105, "learning_rate": 8.991642936506014e-06, "loss": 0.3692, "step": 1020 }, { "epoch": 0.8563041655018172, "grad_norm": 0.4644986117814808, "learning_rate": 8.988701419283127e-06, "loss": 0.3777, "step": 1021 }, { "epoch": 0.8571428571428571, "grad_norm": 0.4418183959129989, "learning_rate": 8.985756100523787e-06, "loss": 0.3694, "step": 1022 }, { "epoch": 0.8579815487838971, "grad_norm": 0.4368564678717103, "learning_rate": 8.98280698303511e-06, "loss": 0.3641, "step": 1023 }, { "epoch": 0.8588202404249371, "grad_norm": 0.42059744988988934, "learning_rate": 8.979854069627829e-06, "loss": 0.3634, "step": 1024 }, { "epoch": 0.8596589320659771, "grad_norm": 0.4402160840093317, "learning_rate": 8.976897363116296e-06, "loss": 0.3809, "step": 1025 }, { "epoch": 0.8604976237070171, "grad_norm": 0.4311227458549869, "learning_rate": 8.973936866318477e-06, "loss": 0.3836, "step": 1026 }, { "epoch": 0.861336315348057, "grad_norm": 0.40793736051021584, "learning_rate": 8.970972582055952e-06, "loss": 0.3745, "step": 1027 }, { "epoch": 0.862175006989097, "grad_norm": 0.4072523915741592, "learning_rate": 8.968004513153907e-06, "loss": 0.3508, "step": 1028 }, { "epoch": 0.863013698630137, "grad_norm": 0.4042775336011296, "learning_rate": 8.965032662441141e-06, "loss": 0.3685, "step": 1029 }, { "epoch": 0.863852390271177, "grad_norm": 0.3496315696403938, "learning_rate": 8.962057032750054e-06, "loss": 0.3496, "step": 1030 }, { "epoch": 0.864691081912217, "grad_norm": 0.4247536597278125, "learning_rate": 8.959077626916644e-06, "loss": 0.3536, "step": 1031 }, { "epoch": 0.8655297735532569, "grad_norm": 0.4109657665703236, "learning_rate": 8.956094447780518e-06, "loss": 0.3668, "step": 1032 }, { "epoch": 0.8663684651942969, "grad_norm": 0.38511193852835235, "learning_rate": 8.953107498184869e-06, "loss": 0.3575, "step": 1033 }, { "epoch": 0.8672071568353369, "grad_norm": 0.43156503956503883, "learning_rate": 8.950116780976488e-06, "loss": 0.3637, "step": 1034 }, { "epoch": 0.8680458484763769, "grad_norm": 0.41681146949522285, "learning_rate": 8.947122299005757e-06, "loss": 0.346, "step": 1035 }, { "epoch": 0.8688845401174168, "grad_norm": 0.3798258073200929, "learning_rate": 8.944124055126646e-06, "loss": 0.3733, "step": 1036 }, { "epoch": 0.8697232317584568, "grad_norm": 0.39590142399047307, "learning_rate": 8.94112205219671e-06, "loss": 0.3677, "step": 1037 }, { "epoch": 0.8705619233994968, "grad_norm": 0.4547416976972712, "learning_rate": 8.938116293077085e-06, "loss": 0.3634, "step": 1038 }, { "epoch": 0.8714006150405368, "grad_norm": 0.4296655343082293, "learning_rate": 8.93510678063249e-06, "loss": 0.3813, "step": 1039 }, { "epoch": 0.8722393066815768, "grad_norm": 0.4105304215592246, "learning_rate": 8.93209351773122e-06, "loss": 0.3482, "step": 1040 }, { "epoch": 0.8730779983226167, "grad_norm": 0.4642786068007437, "learning_rate": 8.929076507245142e-06, "loss": 0.3779, "step": 1041 }, { "epoch": 0.8739166899636567, "grad_norm": 0.37177566078316965, "learning_rate": 8.926055752049698e-06, "loss": 0.3764, "step": 1042 }, { "epoch": 0.8747553816046967, "grad_norm": 0.4108247735343632, "learning_rate": 8.9230312550239e-06, "loss": 0.3597, "step": 1043 }, { "epoch": 0.8755940732457367, "grad_norm": 0.43313543507515173, "learning_rate": 8.920003019050322e-06, "loss": 0.3658, "step": 1044 }, { "epoch": 0.8764327648867767, "grad_norm": 0.4124957542316831, "learning_rate": 8.916971047015104e-06, "loss": 0.3509, "step": 1045 }, { "epoch": 0.8772714565278166, "grad_norm": 0.45163019951292904, "learning_rate": 8.913935341807947e-06, "loss": 0.3618, "step": 1046 }, { "epoch": 0.8781101481688566, "grad_norm": 0.39385059216382823, "learning_rate": 8.910895906322109e-06, "loss": 0.3597, "step": 1047 }, { "epoch": 0.8789488398098966, "grad_norm": 0.4557147758080472, "learning_rate": 8.907852743454404e-06, "loss": 0.3707, "step": 1048 }, { "epoch": 0.8797875314509366, "grad_norm": 0.39436337578967945, "learning_rate": 8.904805856105197e-06, "loss": 0.3584, "step": 1049 }, { "epoch": 0.8806262230919765, "grad_norm": 0.3975380259518863, "learning_rate": 8.901755247178405e-06, "loss": 0.3801, "step": 1050 }, { "epoch": 0.8814649147330165, "grad_norm": 0.4349153942607171, "learning_rate": 8.89870091958149e-06, "loss": 0.3723, "step": 1051 }, { "epoch": 0.8823036063740565, "grad_norm": 0.4102674087182801, "learning_rate": 8.895642876225462e-06, "loss": 0.3639, "step": 1052 }, { "epoch": 0.8831422980150965, "grad_norm": 0.3953368539533199, "learning_rate": 8.892581120024865e-06, "loss": 0.365, "step": 1053 }, { "epoch": 0.8839809896561365, "grad_norm": 0.3946162268128091, "learning_rate": 8.889515653897788e-06, "loss": 0.3485, "step": 1054 }, { "epoch": 0.8848196812971764, "grad_norm": 0.47138037248099435, "learning_rate": 8.886446480765854e-06, "loss": 0.3643, "step": 1055 }, { "epoch": 0.8856583729382164, "grad_norm": 0.4256720811674416, "learning_rate": 8.88337360355422e-06, "loss": 0.3912, "step": 1056 }, { "epoch": 0.8864970645792564, "grad_norm": 0.44810000735944855, "learning_rate": 8.880297025191568e-06, "loss": 0.3824, "step": 1057 }, { "epoch": 0.8873357562202964, "grad_norm": 0.37666045995061553, "learning_rate": 8.877216748610117e-06, "loss": 0.3521, "step": 1058 }, { "epoch": 0.8881744478613364, "grad_norm": 0.48829935138147096, "learning_rate": 8.8741327767456e-06, "loss": 0.3572, "step": 1059 }, { "epoch": 0.8890131395023763, "grad_norm": 0.46089783394653533, "learning_rate": 8.871045112537284e-06, "loss": 0.3876, "step": 1060 }, { "epoch": 0.8898518311434163, "grad_norm": 0.4068545081149648, "learning_rate": 8.867953758927943e-06, "loss": 0.3632, "step": 1061 }, { "epoch": 0.8906905227844563, "grad_norm": 0.4611704210308861, "learning_rate": 8.864858718863873e-06, "loss": 0.3737, "step": 1062 }, { "epoch": 0.8915292144254963, "grad_norm": 0.449765778923254, "learning_rate": 8.861759995294885e-06, "loss": 0.3601, "step": 1063 }, { "epoch": 0.8923679060665362, "grad_norm": 0.418012360832309, "learning_rate": 8.858657591174299e-06, "loss": 0.3671, "step": 1064 }, { "epoch": 0.8932065977075762, "grad_norm": 0.42931326493861105, "learning_rate": 8.85555150945894e-06, "loss": 0.3514, "step": 1065 }, { "epoch": 0.8940452893486162, "grad_norm": 0.4295013311851086, "learning_rate": 8.85244175310914e-06, "loss": 0.3542, "step": 1066 }, { "epoch": 0.8948839809896562, "grad_norm": 0.4176469878468258, "learning_rate": 8.849328325088738e-06, "loss": 0.3501, "step": 1067 }, { "epoch": 0.8957226726306962, "grad_norm": 0.40274589447093173, "learning_rate": 8.846211228365067e-06, "loss": 0.3657, "step": 1068 }, { "epoch": 0.896561364271736, "grad_norm": 0.398680058205735, "learning_rate": 8.843090465908954e-06, "loss": 0.3799, "step": 1069 }, { "epoch": 0.897400055912776, "grad_norm": 0.3748366866675402, "learning_rate": 8.83996604069473e-06, "loss": 0.3597, "step": 1070 }, { "epoch": 0.898238747553816, "grad_norm": 0.3766585806439001, "learning_rate": 8.836837955700204e-06, "loss": 0.3458, "step": 1071 }, { "epoch": 0.899077439194856, "grad_norm": 0.3843500272695824, "learning_rate": 8.833706213906681e-06, "loss": 0.3676, "step": 1072 }, { "epoch": 0.899916130835896, "grad_norm": 0.440963407515374, "learning_rate": 8.830570818298953e-06, "loss": 0.3892, "step": 1073 }, { "epoch": 0.900754822476936, "grad_norm": 0.3945030052933741, "learning_rate": 8.827431771865288e-06, "loss": 0.3547, "step": 1074 }, { "epoch": 0.901593514117976, "grad_norm": 0.38182087594943653, "learning_rate": 8.824289077597437e-06, "loss": 0.3626, "step": 1075 }, { "epoch": 0.902432205759016, "grad_norm": 0.4287519595235546, "learning_rate": 8.821142738490626e-06, "loss": 0.3649, "step": 1076 }, { "epoch": 0.903270897400056, "grad_norm": 0.4096582848522866, "learning_rate": 8.817992757543558e-06, "loss": 0.3691, "step": 1077 }, { "epoch": 0.9041095890410958, "grad_norm": 0.3954005199916545, "learning_rate": 8.814839137758405e-06, "loss": 0.3777, "step": 1078 }, { "epoch": 0.9049482806821358, "grad_norm": 0.4432486862736794, "learning_rate": 8.811681882140807e-06, "loss": 0.3654, "step": 1079 }, { "epoch": 0.9057869723231758, "grad_norm": 0.3802178646837509, "learning_rate": 8.808520993699865e-06, "loss": 0.3789, "step": 1080 }, { "epoch": 0.9066256639642158, "grad_norm": 0.40722428923543696, "learning_rate": 8.805356475448152e-06, "loss": 0.3548, "step": 1081 }, { "epoch": 0.9074643556052558, "grad_norm": 0.4612074470181902, "learning_rate": 8.802188330401694e-06, "loss": 0.3691, "step": 1082 }, { "epoch": 0.9083030472462957, "grad_norm": 0.41296587117259864, "learning_rate": 8.799016561579971e-06, "loss": 0.3713, "step": 1083 }, { "epoch": 0.9091417388873357, "grad_norm": 0.4334153674046401, "learning_rate": 8.795841172005925e-06, "loss": 0.3593, "step": 1084 }, { "epoch": 0.9099804305283757, "grad_norm": 0.37350223530898535, "learning_rate": 8.792662164705944e-06, "loss": 0.3623, "step": 1085 }, { "epoch": 0.9108191221694157, "grad_norm": 0.4318538610478218, "learning_rate": 8.78947954270986e-06, "loss": 0.3643, "step": 1086 }, { "epoch": 0.9116578138104557, "grad_norm": 0.48669837485877643, "learning_rate": 8.786293309050957e-06, "loss": 0.371, "step": 1087 }, { "epoch": 0.9124965054514956, "grad_norm": 0.4222929302363419, "learning_rate": 8.78310346676596e-06, "loss": 0.3591, "step": 1088 }, { "epoch": 0.9133351970925356, "grad_norm": 0.45277244157211227, "learning_rate": 8.779910018895031e-06, "loss": 0.3706, "step": 1089 }, { "epoch": 0.9141738887335756, "grad_norm": 0.40666364370352515, "learning_rate": 8.776712968481766e-06, "loss": 0.3414, "step": 1090 }, { "epoch": 0.9150125803746156, "grad_norm": 0.39562192555152736, "learning_rate": 8.7735123185732e-06, "loss": 0.3467, "step": 1091 }, { "epoch": 0.9158512720156555, "grad_norm": 0.4420858993714182, "learning_rate": 8.7703080722198e-06, "loss": 0.3774, "step": 1092 }, { "epoch": 0.9166899636566955, "grad_norm": 0.41946898250464343, "learning_rate": 8.767100232475451e-06, "loss": 0.3676, "step": 1093 }, { "epoch": 0.9175286552977355, "grad_norm": 0.39996834479409665, "learning_rate": 8.763888802397472e-06, "loss": 0.3716, "step": 1094 }, { "epoch": 0.9183673469387755, "grad_norm": 0.4246607828132671, "learning_rate": 8.760673785046599e-06, "loss": 0.3565, "step": 1095 }, { "epoch": 0.9192060385798155, "grad_norm": 0.41650926633946794, "learning_rate": 8.75745518348699e-06, "loss": 0.3854, "step": 1096 }, { "epoch": 0.9200447302208554, "grad_norm": 0.44817244885649293, "learning_rate": 8.754233000786218e-06, "loss": 0.3755, "step": 1097 }, { "epoch": 0.9208834218618954, "grad_norm": 0.4065595146237896, "learning_rate": 8.751007240015267e-06, "loss": 0.387, "step": 1098 }, { "epoch": 0.9217221135029354, "grad_norm": 0.3702982979399014, "learning_rate": 8.747777904248534e-06, "loss": 0.3543, "step": 1099 }, { "epoch": 0.9225608051439754, "grad_norm": 0.5344306531835493, "learning_rate": 8.74454499656382e-06, "loss": 0.3745, "step": 1100 }, { "epoch": 0.9233994967850154, "grad_norm": 0.3864673510521973, "learning_rate": 8.741308520042336e-06, "loss": 0.3657, "step": 1101 }, { "epoch": 0.9242381884260553, "grad_norm": 0.5022890465911786, "learning_rate": 8.73806847776869e-06, "loss": 0.3454, "step": 1102 }, { "epoch": 0.9250768800670953, "grad_norm": 0.45133215614619615, "learning_rate": 8.734824872830884e-06, "loss": 0.3695, "step": 1103 }, { "epoch": 0.9259155717081353, "grad_norm": 0.3775755350887701, "learning_rate": 8.73157770832033e-06, "loss": 0.3872, "step": 1104 }, { "epoch": 0.9267542633491753, "grad_norm": 0.49132976580499466, "learning_rate": 8.728326987331814e-06, "loss": 0.3552, "step": 1105 }, { "epoch": 0.9275929549902152, "grad_norm": 0.42331037154444384, "learning_rate": 8.725072712963528e-06, "loss": 0.3653, "step": 1106 }, { "epoch": 0.9284316466312552, "grad_norm": 0.4830120171130099, "learning_rate": 8.72181488831704e-06, "loss": 0.4027, "step": 1107 }, { "epoch": 0.9292703382722952, "grad_norm": 0.48701842557757824, "learning_rate": 8.718553516497304e-06, "loss": 0.3743, "step": 1108 }, { "epoch": 0.9301090299133352, "grad_norm": 0.40314415868199904, "learning_rate": 8.715288600612658e-06, "loss": 0.3536, "step": 1109 }, { "epoch": 0.9309477215543752, "grad_norm": 0.45533225751418327, "learning_rate": 8.712020143774815e-06, "loss": 0.3698, "step": 1110 }, { "epoch": 0.9317864131954151, "grad_norm": 0.45738683057629187, "learning_rate": 8.708748149098866e-06, "loss": 0.3834, "step": 1111 }, { "epoch": 0.9326251048364551, "grad_norm": 0.39420528353785966, "learning_rate": 8.705472619703267e-06, "loss": 0.3761, "step": 1112 }, { "epoch": 0.9334637964774951, "grad_norm": 0.41213983457888725, "learning_rate": 8.702193558709851e-06, "loss": 0.3744, "step": 1113 }, { "epoch": 0.9343024881185351, "grad_norm": 0.46105348691240217, "learning_rate": 8.698910969243808e-06, "loss": 0.381, "step": 1114 }, { "epoch": 0.9351411797595751, "grad_norm": 0.4935876256126199, "learning_rate": 8.695624854433702e-06, "loss": 0.3472, "step": 1115 }, { "epoch": 0.935979871400615, "grad_norm": 0.4141021154283585, "learning_rate": 8.692335217411447e-06, "loss": 0.3913, "step": 1116 }, { "epoch": 0.936818563041655, "grad_norm": 0.4168955405169608, "learning_rate": 8.68904206131232e-06, "loss": 0.3583, "step": 1117 }, { "epoch": 0.937657254682695, "grad_norm": 0.39537540664820486, "learning_rate": 8.685745389274947e-06, "loss": 0.3318, "step": 1118 }, { "epoch": 0.938495946323735, "grad_norm": 0.4642116892289507, "learning_rate": 8.68244520444131e-06, "loss": 0.4156, "step": 1119 }, { "epoch": 0.9393346379647749, "grad_norm": 0.4424913155211944, "learning_rate": 8.679141509956737e-06, "loss": 0.3893, "step": 1120 }, { "epoch": 0.9401733296058149, "grad_norm": 0.3958404991547313, "learning_rate": 8.675834308969896e-06, "loss": 0.3623, "step": 1121 }, { "epoch": 0.9410120212468549, "grad_norm": 0.39712192558485, "learning_rate": 8.672523604632809e-06, "loss": 0.3975, "step": 1122 }, { "epoch": 0.9418507128878949, "grad_norm": 0.4311048441451236, "learning_rate": 8.669209400100825e-06, "loss": 0.3649, "step": 1123 }, { "epoch": 0.9426894045289349, "grad_norm": 0.40224158351429473, "learning_rate": 8.665891698532633e-06, "loss": 0.3746, "step": 1124 }, { "epoch": 0.9435280961699748, "grad_norm": 0.3581590501705142, "learning_rate": 8.662570503090257e-06, "loss": 0.3568, "step": 1125 }, { "epoch": 0.9443667878110148, "grad_norm": 0.3851260809227509, "learning_rate": 8.65924581693905e-06, "loss": 0.3728, "step": 1126 }, { "epoch": 0.9452054794520548, "grad_norm": 0.4061058857170392, "learning_rate": 8.65591764324769e-06, "loss": 0.3827, "step": 1127 }, { "epoch": 0.9460441710930948, "grad_norm": 0.4232178623261647, "learning_rate": 8.652585985188181e-06, "loss": 0.3652, "step": 1128 }, { "epoch": 0.9468828627341348, "grad_norm": 0.3998720077157625, "learning_rate": 8.649250845935848e-06, "loss": 0.3569, "step": 1129 }, { "epoch": 0.9477215543751747, "grad_norm": 0.40449865694731046, "learning_rate": 8.645912228669328e-06, "loss": 0.3851, "step": 1130 }, { "epoch": 0.9485602460162147, "grad_norm": 0.3989127979775737, "learning_rate": 8.642570136570585e-06, "loss": 0.3746, "step": 1131 }, { "epoch": 0.9493989376572547, "grad_norm": 0.4328712809754003, "learning_rate": 8.639224572824881e-06, "loss": 0.3549, "step": 1132 }, { "epoch": 0.9502376292982947, "grad_norm": 0.44899361713868075, "learning_rate": 8.635875540620798e-06, "loss": 0.3868, "step": 1133 }, { "epoch": 0.9510763209393346, "grad_norm": 0.48019756228791577, "learning_rate": 8.632523043150213e-06, "loss": 0.3841, "step": 1134 }, { "epoch": 0.9519150125803746, "grad_norm": 0.42019511827680645, "learning_rate": 8.62916708360832e-06, "loss": 0.361, "step": 1135 }, { "epoch": 0.9527537042214146, "grad_norm": 0.41755885688193395, "learning_rate": 8.625807665193597e-06, "loss": 0.3598, "step": 1136 }, { "epoch": 0.9535923958624546, "grad_norm": 0.44281889101392874, "learning_rate": 8.62244479110783e-06, "loss": 0.3475, "step": 1137 }, { "epoch": 0.9544310875034946, "grad_norm": 0.4592722463000471, "learning_rate": 8.619078464556092e-06, "loss": 0.3956, "step": 1138 }, { "epoch": 0.9552697791445345, "grad_norm": 0.3907013592536409, "learning_rate": 8.615708688746752e-06, "loss": 0.3563, "step": 1139 }, { "epoch": 0.9561084707855745, "grad_norm": 0.45541168251005687, "learning_rate": 8.612335466891457e-06, "loss": 0.4076, "step": 1140 }, { "epoch": 0.9569471624266145, "grad_norm": 0.42875816875348016, "learning_rate": 8.60895880220515e-06, "loss": 0.3674, "step": 1141 }, { "epoch": 0.9577858540676545, "grad_norm": 0.46706902148951496, "learning_rate": 8.605578697906047e-06, "loss": 0.3699, "step": 1142 }, { "epoch": 0.9586245457086945, "grad_norm": 0.5190735098030757, "learning_rate": 8.602195157215647e-06, "loss": 0.3745, "step": 1143 }, { "epoch": 0.9594632373497344, "grad_norm": 0.44375052406119025, "learning_rate": 8.598808183358723e-06, "loss": 0.3856, "step": 1144 }, { "epoch": 0.9603019289907744, "grad_norm": 0.41774044272678496, "learning_rate": 8.595417779563316e-06, "loss": 0.3609, "step": 1145 }, { "epoch": 0.9611406206318144, "grad_norm": 0.4248779256282544, "learning_rate": 8.592023949060739e-06, "loss": 0.3493, "step": 1146 }, { "epoch": 0.9619793122728544, "grad_norm": 0.43569391770034593, "learning_rate": 8.588626695085574e-06, "loss": 0.3719, "step": 1147 }, { "epoch": 0.9628180039138943, "grad_norm": 0.38644643574595755, "learning_rate": 8.585226020875664e-06, "loss": 0.3765, "step": 1148 }, { "epoch": 0.9636566955549343, "grad_norm": 0.4316998291771266, "learning_rate": 8.581821929672105e-06, "loss": 0.3814, "step": 1149 }, { "epoch": 0.9644953871959743, "grad_norm": 0.43568972109561416, "learning_rate": 8.57841442471926e-06, "loss": 0.3763, "step": 1150 }, { "epoch": 0.9653340788370143, "grad_norm": 0.38289339236160097, "learning_rate": 8.57500350926474e-06, "loss": 0.3485, "step": 1151 }, { "epoch": 0.9661727704780543, "grad_norm": 0.39127208005138925, "learning_rate": 8.571589186559407e-06, "loss": 0.3707, "step": 1152 }, { "epoch": 0.9670114621190942, "grad_norm": 0.4533555312486705, "learning_rate": 8.568171459857371e-06, "loss": 0.3664, "step": 1153 }, { "epoch": 0.9678501537601342, "grad_norm": 0.38584285810243885, "learning_rate": 8.564750332415986e-06, "loss": 0.3682, "step": 1154 }, { "epoch": 0.9686888454011742, "grad_norm": 0.42944093717625875, "learning_rate": 8.561325807495847e-06, "loss": 0.3518, "step": 1155 }, { "epoch": 0.9695275370422142, "grad_norm": 0.39034986192270127, "learning_rate": 8.557897888360788e-06, "loss": 0.3778, "step": 1156 }, { "epoch": 0.9703662286832542, "grad_norm": 0.3692492642306774, "learning_rate": 8.554466578277876e-06, "loss": 0.3615, "step": 1157 }, { "epoch": 0.9712049203242941, "grad_norm": 0.3773869355298152, "learning_rate": 8.551031880517411e-06, "loss": 0.3598, "step": 1158 }, { "epoch": 0.9720436119653341, "grad_norm": 0.3977221665953523, "learning_rate": 8.547593798352924e-06, "loss": 0.3861, "step": 1159 }, { "epoch": 0.9728823036063741, "grad_norm": 0.38281254606708426, "learning_rate": 8.544152335061166e-06, "loss": 0.3719, "step": 1160 }, { "epoch": 0.9737209952474141, "grad_norm": 0.3751644321252312, "learning_rate": 8.540707493922115e-06, "loss": 0.3566, "step": 1161 }, { "epoch": 0.974559686888454, "grad_norm": 0.3987617155941411, "learning_rate": 8.53725927821897e-06, "loss": 0.3648, "step": 1162 }, { "epoch": 0.975398378529494, "grad_norm": 0.4857754028193396, "learning_rate": 8.533807691238139e-06, "loss": 0.3724, "step": 1163 }, { "epoch": 0.976237070170534, "grad_norm": 0.42084517143405, "learning_rate": 8.53035273626925e-06, "loss": 0.3664, "step": 1164 }, { "epoch": 0.977075761811574, "grad_norm": 0.39074098127746176, "learning_rate": 8.526894416605139e-06, "loss": 0.3573, "step": 1165 }, { "epoch": 0.977914453452614, "grad_norm": 0.450168492233764, "learning_rate": 8.523432735541846e-06, "loss": 0.3714, "step": 1166 }, { "epoch": 0.9787531450936539, "grad_norm": 0.4488100296248967, "learning_rate": 8.519967696378616e-06, "loss": 0.3691, "step": 1167 }, { "epoch": 0.9795918367346939, "grad_norm": 0.3867773613711039, "learning_rate": 8.5164993024179e-06, "loss": 0.3596, "step": 1168 }, { "epoch": 0.9804305283757339, "grad_norm": 0.46256804233168136, "learning_rate": 8.513027556965339e-06, "loss": 0.3695, "step": 1169 }, { "epoch": 0.9812692200167739, "grad_norm": 0.47213931593222236, "learning_rate": 8.509552463329771e-06, "loss": 0.348, "step": 1170 }, { "epoch": 0.9821079116578139, "grad_norm": 0.4242093828509999, "learning_rate": 8.506074024823226e-06, "loss": 0.3632, "step": 1171 }, { "epoch": 0.9829466032988537, "grad_norm": 0.4530811792643082, "learning_rate": 8.50259224476092e-06, "loss": 0.3799, "step": 1172 }, { "epoch": 0.9837852949398938, "grad_norm": 0.4500226039059397, "learning_rate": 8.499107126461252e-06, "loss": 0.375, "step": 1173 }, { "epoch": 0.9846239865809338, "grad_norm": 0.42764469617670103, "learning_rate": 8.495618673245811e-06, "loss": 0.3695, "step": 1174 }, { "epoch": 0.9854626782219738, "grad_norm": 0.44890556895952693, "learning_rate": 8.492126888439356e-06, "loss": 0.3781, "step": 1175 }, { "epoch": 0.9863013698630136, "grad_norm": 0.4186026086275175, "learning_rate": 8.488631775369825e-06, "loss": 0.3756, "step": 1176 }, { "epoch": 0.9871400615040536, "grad_norm": 0.3888221144366789, "learning_rate": 8.485133337368325e-06, "loss": 0.3671, "step": 1177 }, { "epoch": 0.9879787531450936, "grad_norm": 0.4205975903178792, "learning_rate": 8.481631577769135e-06, "loss": 0.3777, "step": 1178 }, { "epoch": 0.9888174447861336, "grad_norm": 0.4150532681050207, "learning_rate": 8.4781264999097e-06, "loss": 0.3698, "step": 1179 }, { "epoch": 0.9896561364271736, "grad_norm": 0.3948522882847982, "learning_rate": 8.474618107130626e-06, "loss": 0.3604, "step": 1180 }, { "epoch": 0.9904948280682135, "grad_norm": 0.39359164725221, "learning_rate": 8.471106402775677e-06, "loss": 0.3712, "step": 1181 }, { "epoch": 0.9913335197092535, "grad_norm": 0.4083548141410776, "learning_rate": 8.467591390191779e-06, "loss": 0.3632, "step": 1182 }, { "epoch": 0.9921722113502935, "grad_norm": 0.42643058496746494, "learning_rate": 8.464073072729e-06, "loss": 0.3479, "step": 1183 }, { "epoch": 0.9930109029913335, "grad_norm": 0.42370531106176196, "learning_rate": 8.46055145374057e-06, "loss": 0.3553, "step": 1184 }, { "epoch": 0.9938495946323735, "grad_norm": 0.46995891599809936, "learning_rate": 8.457026536582861e-06, "loss": 0.367, "step": 1185 }, { "epoch": 0.9946882862734134, "grad_norm": 0.41832152028165387, "learning_rate": 8.453498324615383e-06, "loss": 0.3643, "step": 1186 }, { "epoch": 0.9955269779144534, "grad_norm": 0.3875812996969599, "learning_rate": 8.449966821200796e-06, "loss": 0.3521, "step": 1187 }, { "epoch": 0.9963656695554934, "grad_norm": 0.47086767676055585, "learning_rate": 8.44643202970489e-06, "loss": 0.3661, "step": 1188 }, { "epoch": 0.9972043611965334, "grad_norm": 0.4234823280164967, "learning_rate": 8.442893953496587e-06, "loss": 0.3544, "step": 1189 }, { "epoch": 0.9980430528375733, "grad_norm": 0.416213494431186, "learning_rate": 8.439352595947948e-06, "loss": 0.3903, "step": 1190 }, { "epoch": 0.9988817444786133, "grad_norm": 0.42489253322510717, "learning_rate": 8.435807960434158e-06, "loss": 0.383, "step": 1191 }, { "epoch": 0.9997204361196533, "grad_norm": 0.41491782500251617, "learning_rate": 8.432260050333518e-06, "loss": 0.3701, "step": 1192 }, { "epoch": 1.0005591277606933, "grad_norm": 0.7814950893808835, "learning_rate": 8.428708869027462e-06, "loss": 0.5899, "step": 1193 }, { "epoch": 1.0013978194017332, "grad_norm": 0.4332993148260096, "learning_rate": 8.425154419900533e-06, "loss": 0.2988, "step": 1194 }, { "epoch": 1.0022365110427733, "grad_norm": 0.4747478543885571, "learning_rate": 8.421596706340398e-06, "loss": 0.342, "step": 1195 }, { "epoch": 1.0030752026838132, "grad_norm": 0.42050041063835214, "learning_rate": 8.418035731737823e-06, "loss": 0.3141, "step": 1196 }, { "epoch": 1.0039138943248531, "grad_norm": 0.5098843323073364, "learning_rate": 8.414471499486692e-06, "loss": 0.3553, "step": 1197 }, { "epoch": 1.0047525859658932, "grad_norm": 0.4985045554264793, "learning_rate": 8.410904012983985e-06, "loss": 0.3553, "step": 1198 }, { "epoch": 1.0055912776069331, "grad_norm": 0.41786269421830996, "learning_rate": 8.407333275629795e-06, "loss": 0.304, "step": 1199 }, { "epoch": 1.0064299692479732, "grad_norm": 0.4591869529034766, "learning_rate": 8.403759290827302e-06, "loss": 0.3298, "step": 1200 }, { "epoch": 1.0072686608890131, "grad_norm": 0.4069957139247414, "learning_rate": 8.400182061982788e-06, "loss": 0.3689, "step": 1201 }, { "epoch": 1.008107352530053, "grad_norm": 0.46519333185435174, "learning_rate": 8.396601592505626e-06, "loss": 0.359, "step": 1202 }, { "epoch": 1.0089460441710931, "grad_norm": 0.41190408011070445, "learning_rate": 8.393017885808275e-06, "loss": 0.3201, "step": 1203 }, { "epoch": 1.009784735812133, "grad_norm": 0.38191303113799463, "learning_rate": 8.389430945306279e-06, "loss": 0.3168, "step": 1204 }, { "epoch": 1.0106234274531731, "grad_norm": 0.4527061437300169, "learning_rate": 8.385840774418266e-06, "loss": 0.3696, "step": 1205 }, { "epoch": 1.011462119094213, "grad_norm": 0.3989620828221027, "learning_rate": 8.382247376565943e-06, "loss": 0.3417, "step": 1206 }, { "epoch": 1.012300810735253, "grad_norm": 0.4152786603575097, "learning_rate": 8.378650755174094e-06, "loss": 0.358, "step": 1207 }, { "epoch": 1.013139502376293, "grad_norm": 0.4048623966749428, "learning_rate": 8.375050913670573e-06, "loss": 0.3594, "step": 1208 }, { "epoch": 1.013978194017333, "grad_norm": 0.4201210628830915, "learning_rate": 8.371447855486302e-06, "loss": 0.3187, "step": 1209 }, { "epoch": 1.014816885658373, "grad_norm": 0.42441409634029487, "learning_rate": 8.367841584055267e-06, "loss": 0.3961, "step": 1210 }, { "epoch": 1.015655577299413, "grad_norm": 0.40584358553321, "learning_rate": 8.364232102814524e-06, "loss": 0.3468, "step": 1211 }, { "epoch": 1.0164942689404528, "grad_norm": 0.371963717189708, "learning_rate": 8.360619415204183e-06, "loss": 0.3227, "step": 1212 }, { "epoch": 1.017332960581493, "grad_norm": 0.40103347406744594, "learning_rate": 8.357003524667411e-06, "loss": 0.3151, "step": 1213 }, { "epoch": 1.0181716522225328, "grad_norm": 0.43759971893346217, "learning_rate": 8.353384434650425e-06, "loss": 0.381, "step": 1214 }, { "epoch": 1.019010343863573, "grad_norm": 0.38384342707542846, "learning_rate": 8.349762148602495e-06, "loss": 0.3256, "step": 1215 }, { "epoch": 1.0198490355046128, "grad_norm": 0.3842121261240159, "learning_rate": 8.346136669975937e-06, "loss": 0.3315, "step": 1216 }, { "epoch": 1.0206877271456527, "grad_norm": 0.4161283543203865, "learning_rate": 8.342508002226105e-06, "loss": 0.323, "step": 1217 }, { "epoch": 1.0215264187866928, "grad_norm": 0.43899753528407714, "learning_rate": 8.338876148811398e-06, "loss": 0.3338, "step": 1218 }, { "epoch": 1.0223651104277327, "grad_norm": 0.38348708256079095, "learning_rate": 8.335241113193251e-06, "loss": 0.3218, "step": 1219 }, { "epoch": 1.0232038020687728, "grad_norm": 0.42196869940957005, "learning_rate": 8.331602898836126e-06, "loss": 0.3474, "step": 1220 }, { "epoch": 1.0240424937098127, "grad_norm": 0.3675263208983364, "learning_rate": 8.327961509207523e-06, "loss": 0.3091, "step": 1221 }, { "epoch": 1.0248811853508526, "grad_norm": 0.41641212924538396, "learning_rate": 8.32431694777796e-06, "loss": 0.3256, "step": 1222 }, { "epoch": 1.0257198769918927, "grad_norm": 0.37725159799813457, "learning_rate": 8.320669218020983e-06, "loss": 0.3417, "step": 1223 }, { "epoch": 1.0265585686329326, "grad_norm": 0.367706080862134, "learning_rate": 8.317018323413157e-06, "loss": 0.3318, "step": 1224 }, { "epoch": 1.0273972602739727, "grad_norm": 0.41830906971884724, "learning_rate": 8.313364267434064e-06, "loss": 0.3171, "step": 1225 }, { "epoch": 1.0282359519150126, "grad_norm": 0.36379846399551313, "learning_rate": 8.309707053566294e-06, "loss": 0.3438, "step": 1226 }, { "epoch": 1.0290746435560525, "grad_norm": 0.41184955799731654, "learning_rate": 8.306046685295454e-06, "loss": 0.3718, "step": 1227 }, { "epoch": 1.0299133351970926, "grad_norm": 0.3895597812374238, "learning_rate": 8.302383166110153e-06, "loss": 0.3214, "step": 1228 }, { "epoch": 1.0307520268381325, "grad_norm": 0.3987582113139142, "learning_rate": 8.298716499502005e-06, "loss": 0.3328, "step": 1229 }, { "epoch": 1.0315907184791724, "grad_norm": 0.3952218325098424, "learning_rate": 8.29504668896562e-06, "loss": 0.3336, "step": 1230 }, { "epoch": 1.0324294101202125, "grad_norm": 0.40730280088650206, "learning_rate": 8.291373737998608e-06, "loss": 0.3413, "step": 1231 }, { "epoch": 1.0332681017612524, "grad_norm": 0.41229059570186005, "learning_rate": 8.287697650101575e-06, "loss": 0.3649, "step": 1232 }, { "epoch": 1.0341067934022925, "grad_norm": 0.39369241586804016, "learning_rate": 8.284018428778108e-06, "loss": 0.3486, "step": 1233 }, { "epoch": 1.0349454850433324, "grad_norm": 0.3881936320290274, "learning_rate": 8.280336077534787e-06, "loss": 0.3276, "step": 1234 }, { "epoch": 1.0357841766843723, "grad_norm": 0.4453014437182983, "learning_rate": 8.276650599881176e-06, "loss": 0.33, "step": 1235 }, { "epoch": 1.0366228683254124, "grad_norm": 0.45229051487450006, "learning_rate": 8.27296199932981e-06, "loss": 0.3407, "step": 1236 }, { "epoch": 1.0374615599664523, "grad_norm": 0.3743266285512833, "learning_rate": 8.269270279396212e-06, "loss": 0.3263, "step": 1237 }, { "epoch": 1.0383002516074924, "grad_norm": 0.4814747774677376, "learning_rate": 8.265575443598872e-06, "loss": 0.3555, "step": 1238 }, { "epoch": 1.0391389432485323, "grad_norm": 0.4702831846087843, "learning_rate": 8.261877495459246e-06, "loss": 0.3365, "step": 1239 }, { "epoch": 1.0399776348895722, "grad_norm": 0.3485217349465621, "learning_rate": 8.258176438501765e-06, "loss": 0.3342, "step": 1240 }, { "epoch": 1.0408163265306123, "grad_norm": 0.4534262084778915, "learning_rate": 8.254472276253816e-06, "loss": 0.3571, "step": 1241 }, { "epoch": 1.0416550181716522, "grad_norm": 0.41255609342674954, "learning_rate": 8.250765012245747e-06, "loss": 0.3241, "step": 1242 }, { "epoch": 1.0424937098126923, "grad_norm": 0.37099523947005575, "learning_rate": 8.247054650010868e-06, "loss": 0.3147, "step": 1243 }, { "epoch": 1.0433324014537322, "grad_norm": 0.41086782564216384, "learning_rate": 8.243341193085431e-06, "loss": 0.3585, "step": 1244 }, { "epoch": 1.044171093094772, "grad_norm": 0.40910340204318335, "learning_rate": 8.239624645008646e-06, "loss": 0.3269, "step": 1245 }, { "epoch": 1.0450097847358122, "grad_norm": 0.4233541528432374, "learning_rate": 8.235905009322667e-06, "loss": 0.3383, "step": 1246 }, { "epoch": 1.045848476376852, "grad_norm": 0.4394289304130736, "learning_rate": 8.232182289572592e-06, "loss": 0.3103, "step": 1247 }, { "epoch": 1.0466871680178922, "grad_norm": 0.38904660617563064, "learning_rate": 8.228456489306452e-06, "loss": 0.3206, "step": 1248 }, { "epoch": 1.047525859658932, "grad_norm": 0.4101974928651451, "learning_rate": 8.224727612075222e-06, "loss": 0.3204, "step": 1249 }, { "epoch": 1.048364551299972, "grad_norm": 0.4290684676172559, "learning_rate": 8.220995661432805e-06, "loss": 0.3101, "step": 1250 }, { "epoch": 1.049203242941012, "grad_norm": 0.39489215949723905, "learning_rate": 8.217260640936035e-06, "loss": 0.3194, "step": 1251 }, { "epoch": 1.050041934582052, "grad_norm": 0.451966349798068, "learning_rate": 8.213522554144672e-06, "loss": 0.3669, "step": 1252 }, { "epoch": 1.0508806262230919, "grad_norm": 0.40444222408134767, "learning_rate": 8.209781404621398e-06, "loss": 0.3536, "step": 1253 }, { "epoch": 1.051719317864132, "grad_norm": 0.40328950157841675, "learning_rate": 8.20603719593181e-06, "loss": 0.3468, "step": 1254 }, { "epoch": 1.0525580095051719, "grad_norm": 0.41031120741853255, "learning_rate": 8.202289931644425e-06, "loss": 0.3217, "step": 1255 }, { "epoch": 1.053396701146212, "grad_norm": 0.41046167120621196, "learning_rate": 8.198539615330675e-06, "loss": 0.301, "step": 1256 }, { "epoch": 1.0542353927872519, "grad_norm": 0.4580939064353939, "learning_rate": 8.194786250564895e-06, "loss": 0.3453, "step": 1257 }, { "epoch": 1.0550740844282918, "grad_norm": 0.39052946376057757, "learning_rate": 8.191029840924328e-06, "loss": 0.3363, "step": 1258 }, { "epoch": 1.0559127760693319, "grad_norm": 0.400166619890808, "learning_rate": 8.187270389989117e-06, "loss": 0.315, "step": 1259 }, { "epoch": 1.0567514677103718, "grad_norm": 0.4643927125529261, "learning_rate": 8.183507901342309e-06, "loss": 0.3758, "step": 1260 }, { "epoch": 1.0575901593514119, "grad_norm": 0.4656838632173292, "learning_rate": 8.17974237856984e-06, "loss": 0.3501, "step": 1261 }, { "epoch": 1.0584288509924518, "grad_norm": 0.3855326194702158, "learning_rate": 8.175973825260538e-06, "loss": 0.3229, "step": 1262 }, { "epoch": 1.0592675426334917, "grad_norm": 0.48894459441651406, "learning_rate": 8.172202245006126e-06, "loss": 0.359, "step": 1263 }, { "epoch": 1.0601062342745318, "grad_norm": 0.4155970360109209, "learning_rate": 8.168427641401207e-06, "loss": 0.3263, "step": 1264 }, { "epoch": 1.0609449259155717, "grad_norm": 0.3856365871674811, "learning_rate": 8.164650018043262e-06, "loss": 0.3343, "step": 1265 }, { "epoch": 1.0617836175566118, "grad_norm": 0.3941768106098403, "learning_rate": 8.160869378532656e-06, "loss": 0.3277, "step": 1266 }, { "epoch": 1.0626223091976517, "grad_norm": 0.3970397307750833, "learning_rate": 8.15708572647263e-06, "loss": 0.3159, "step": 1267 }, { "epoch": 1.0634610008386916, "grad_norm": 0.41236536316876776, "learning_rate": 8.153299065469287e-06, "loss": 0.3563, "step": 1268 }, { "epoch": 1.0642996924797317, "grad_norm": 0.418274226441487, "learning_rate": 8.149509399131606e-06, "loss": 0.3372, "step": 1269 }, { "epoch": 1.0651383841207716, "grad_norm": 0.36458520287111645, "learning_rate": 8.145716731071429e-06, "loss": 0.3221, "step": 1270 }, { "epoch": 1.0659770757618117, "grad_norm": 0.38364307003600034, "learning_rate": 8.141921064903456e-06, "loss": 0.3313, "step": 1271 }, { "epoch": 1.0668157674028516, "grad_norm": 0.4024196267077746, "learning_rate": 8.138122404245249e-06, "loss": 0.3472, "step": 1272 }, { "epoch": 1.0676544590438914, "grad_norm": 0.38270305406271476, "learning_rate": 8.134320752717217e-06, "loss": 0.3489, "step": 1273 }, { "epoch": 1.0684931506849316, "grad_norm": 0.38692912402428986, "learning_rate": 8.130516113942626e-06, "loss": 0.3267, "step": 1274 }, { "epoch": 1.0693318423259714, "grad_norm": 0.3827282660069844, "learning_rate": 8.126708491547588e-06, "loss": 0.3428, "step": 1275 }, { "epoch": 1.0701705339670116, "grad_norm": 0.39763911440746225, "learning_rate": 8.122897889161054e-06, "loss": 0.319, "step": 1276 }, { "epoch": 1.0710092256080515, "grad_norm": 0.41091408954836484, "learning_rate": 8.119084310414822e-06, "loss": 0.3417, "step": 1277 }, { "epoch": 1.0718479172490913, "grad_norm": 0.3630498395512095, "learning_rate": 8.11526775894352e-06, "loss": 0.3262, "step": 1278 }, { "epoch": 1.0726866088901315, "grad_norm": 0.40167987127087057, "learning_rate": 8.111448238384614e-06, "loss": 0.3398, "step": 1279 }, { "epoch": 1.0735253005311713, "grad_norm": 0.40527824336504015, "learning_rate": 8.107625752378398e-06, "loss": 0.3132, "step": 1280 }, { "epoch": 1.0743639921722115, "grad_norm": 0.4413801899071366, "learning_rate": 8.103800304567995e-06, "loss": 0.3671, "step": 1281 }, { "epoch": 1.0752026838132513, "grad_norm": 0.40049890528814514, "learning_rate": 8.099971898599343e-06, "loss": 0.3636, "step": 1282 }, { "epoch": 1.0760413754542912, "grad_norm": 0.35961144970542913, "learning_rate": 8.096140538121208e-06, "loss": 0.3055, "step": 1283 }, { "epoch": 1.0768800670953314, "grad_norm": 0.4025898541261542, "learning_rate": 8.092306226785169e-06, "loss": 0.3538, "step": 1284 }, { "epoch": 1.0777187587363712, "grad_norm": 0.4095348740821242, "learning_rate": 8.088468968245615e-06, "loss": 0.3654, "step": 1285 }, { "epoch": 1.0785574503774114, "grad_norm": 0.3671270162160849, "learning_rate": 8.08462876615975e-06, "loss": 0.3167, "step": 1286 }, { "epoch": 1.0793961420184512, "grad_norm": 0.3848983600932233, "learning_rate": 8.080785624187572e-06, "loss": 0.3076, "step": 1287 }, { "epoch": 1.0802348336594911, "grad_norm": 0.38091254817819026, "learning_rate": 8.076939545991895e-06, "loss": 0.3615, "step": 1288 }, { "epoch": 1.0810735253005312, "grad_norm": 0.3586652192992676, "learning_rate": 8.073090535238322e-06, "loss": 0.3346, "step": 1289 }, { "epoch": 1.0819122169415711, "grad_norm": 0.40065207488071425, "learning_rate": 8.069238595595252e-06, "loss": 0.3277, "step": 1290 }, { "epoch": 1.082750908582611, "grad_norm": 0.4237426244214497, "learning_rate": 8.06538373073388e-06, "loss": 0.3279, "step": 1291 }, { "epoch": 1.0835896002236511, "grad_norm": 0.491519776325195, "learning_rate": 8.061525944328184e-06, "loss": 0.363, "step": 1292 }, { "epoch": 1.084428291864691, "grad_norm": 0.3920790049494419, "learning_rate": 8.05766524005493e-06, "loss": 0.3185, "step": 1293 }, { "epoch": 1.0852669835057311, "grad_norm": 0.4363398166690538, "learning_rate": 8.053801621593661e-06, "loss": 0.3163, "step": 1294 }, { "epoch": 1.086105675146771, "grad_norm": 0.4408204441404118, "learning_rate": 8.049935092626703e-06, "loss": 0.3523, "step": 1295 }, { "epoch": 1.086944366787811, "grad_norm": 0.40265188760776827, "learning_rate": 8.046065656839151e-06, "loss": 0.3401, "step": 1296 }, { "epoch": 1.087783058428851, "grad_norm": 0.40661025359040054, "learning_rate": 8.042193317918872e-06, "loss": 0.3318, "step": 1297 }, { "epoch": 1.088621750069891, "grad_norm": 0.3972776989849043, "learning_rate": 8.0383180795565e-06, "loss": 0.3259, "step": 1298 }, { "epoch": 1.089460441710931, "grad_norm": 0.4186918145116525, "learning_rate": 8.034439945445435e-06, "loss": 0.3408, "step": 1299 }, { "epoch": 1.090299133351971, "grad_norm": 0.4424238672879728, "learning_rate": 8.030558919281831e-06, "loss": 0.3449, "step": 1300 }, { "epoch": 1.0911378249930108, "grad_norm": 0.39327922917482383, "learning_rate": 8.026675004764603e-06, "loss": 0.3039, "step": 1301 }, { "epoch": 1.091976516634051, "grad_norm": 0.41185698255898123, "learning_rate": 8.02278820559542e-06, "loss": 0.3564, "step": 1302 }, { "epoch": 1.0928152082750908, "grad_norm": 0.388418292547771, "learning_rate": 8.018898525478693e-06, "loss": 0.3112, "step": 1303 }, { "epoch": 1.093653899916131, "grad_norm": 0.3796338824404686, "learning_rate": 8.015005968121586e-06, "loss": 0.3487, "step": 1304 }, { "epoch": 1.0944925915571708, "grad_norm": 0.3938968421696437, "learning_rate": 8.011110537234004e-06, "loss": 0.3263, "step": 1305 }, { "epoch": 1.0953312831982107, "grad_norm": 0.3536255887759828, "learning_rate": 8.007212236528588e-06, "loss": 0.3358, "step": 1306 }, { "epoch": 1.0961699748392508, "grad_norm": 0.4618649435737609, "learning_rate": 8.003311069720717e-06, "loss": 0.387, "step": 1307 }, { "epoch": 1.0970086664802907, "grad_norm": 0.34725541405620824, "learning_rate": 7.9994070405285e-06, "loss": 0.3352, "step": 1308 }, { "epoch": 1.0978473581213306, "grad_norm": 0.41795654823274, "learning_rate": 7.995500152672772e-06, "loss": 0.3801, "step": 1309 }, { "epoch": 1.0986860497623707, "grad_norm": 0.36413946511111406, "learning_rate": 7.9915904098771e-06, "loss": 0.3181, "step": 1310 }, { "epoch": 1.0995247414034106, "grad_norm": 0.3651639328341943, "learning_rate": 7.987677815867761e-06, "loss": 0.3024, "step": 1311 }, { "epoch": 1.1003634330444507, "grad_norm": 0.45320964910075934, "learning_rate": 7.983762374373757e-06, "loss": 0.327, "step": 1312 }, { "epoch": 1.1012021246854906, "grad_norm": 0.40986567595087603, "learning_rate": 7.979844089126805e-06, "loss": 0.3414, "step": 1313 }, { "epoch": 1.1020408163265305, "grad_norm": 0.3811095621369927, "learning_rate": 7.975922963861326e-06, "loss": 0.3402, "step": 1314 }, { "epoch": 1.1028795079675706, "grad_norm": 0.4142817341415106, "learning_rate": 7.971999002314455e-06, "loss": 0.3168, "step": 1315 }, { "epoch": 1.1037181996086105, "grad_norm": 0.44045126398107326, "learning_rate": 7.968072208226024e-06, "loss": 0.3475, "step": 1316 }, { "epoch": 1.1045568912496506, "grad_norm": 0.41302938359316027, "learning_rate": 7.964142585338566e-06, "loss": 0.3473, "step": 1317 }, { "epoch": 1.1053955828906905, "grad_norm": 0.42133203196923075, "learning_rate": 7.96021013739731e-06, "loss": 0.3038, "step": 1318 }, { "epoch": 1.1062342745317304, "grad_norm": 0.4070600051048927, "learning_rate": 7.956274868150184e-06, "loss": 0.3435, "step": 1319 }, { "epoch": 1.1070729661727705, "grad_norm": 0.4080264654661738, "learning_rate": 7.952336781347797e-06, "loss": 0.332, "step": 1320 }, { "epoch": 1.1079116578138104, "grad_norm": 0.46890355563768904, "learning_rate": 7.948395880743445e-06, "loss": 0.3644, "step": 1321 }, { "epoch": 1.1087503494548505, "grad_norm": 0.452399685087341, "learning_rate": 7.944452170093105e-06, "loss": 0.3368, "step": 1322 }, { "epoch": 1.1095890410958904, "grad_norm": 0.4036911430339861, "learning_rate": 7.940505653155437e-06, "loss": 0.3368, "step": 1323 }, { "epoch": 1.1104277327369303, "grad_norm": 0.44630150605913, "learning_rate": 7.936556333691771e-06, "loss": 0.3324, "step": 1324 }, { "epoch": 1.1112664243779704, "grad_norm": 0.42562283852734656, "learning_rate": 7.93260421546611e-06, "loss": 0.332, "step": 1325 }, { "epoch": 1.1121051160190103, "grad_norm": 0.4484263931775092, "learning_rate": 7.928649302245125e-06, "loss": 0.3519, "step": 1326 }, { "epoch": 1.1129438076600504, "grad_norm": 0.4205874358995413, "learning_rate": 7.924691597798145e-06, "loss": 0.3568, "step": 1327 }, { "epoch": 1.1137824993010903, "grad_norm": 0.4243276751516663, "learning_rate": 7.920731105897169e-06, "loss": 0.3648, "step": 1328 }, { "epoch": 1.1146211909421302, "grad_norm": 0.469649794069947, "learning_rate": 7.916767830316847e-06, "loss": 0.3401, "step": 1329 }, { "epoch": 1.1154598825831703, "grad_norm": 0.4157499671798857, "learning_rate": 7.91280177483448e-06, "loss": 0.3196, "step": 1330 }, { "epoch": 1.1162985742242102, "grad_norm": 0.4408801851858546, "learning_rate": 7.908832943230022e-06, "loss": 0.3558, "step": 1331 }, { "epoch": 1.1171372658652503, "grad_norm": 0.43720077599988516, "learning_rate": 7.904861339286073e-06, "loss": 0.3477, "step": 1332 }, { "epoch": 1.1179759575062902, "grad_norm": 0.37809804739306313, "learning_rate": 7.900886966787876e-06, "loss": 0.3103, "step": 1333 }, { "epoch": 1.11881464914733, "grad_norm": 0.41573386673765905, "learning_rate": 7.896909829523308e-06, "loss": 0.3267, "step": 1334 }, { "epoch": 1.1196533407883702, "grad_norm": 0.42118576253571366, "learning_rate": 7.892929931282884e-06, "loss": 0.3057, "step": 1335 }, { "epoch": 1.12049203242941, "grad_norm": 0.4773626415036113, "learning_rate": 7.88894727585975e-06, "loss": 0.368, "step": 1336 }, { "epoch": 1.1213307240704502, "grad_norm": 0.408122554080642, "learning_rate": 7.884961867049682e-06, "loss": 0.3502, "step": 1337 }, { "epoch": 1.12216941571149, "grad_norm": 0.42075306985876587, "learning_rate": 7.880973708651078e-06, "loss": 0.2936, "step": 1338 }, { "epoch": 1.12300810735253, "grad_norm": 0.43719023294438464, "learning_rate": 7.87698280446496e-06, "loss": 0.3537, "step": 1339 }, { "epoch": 1.12384679899357, "grad_norm": 0.40684929101223116, "learning_rate": 7.872989158294954e-06, "loss": 0.3367, "step": 1340 }, { "epoch": 1.12468549063461, "grad_norm": 0.470900651738017, "learning_rate": 7.86899277394732e-06, "loss": 0.3642, "step": 1341 }, { "epoch": 1.12552418227565, "grad_norm": 0.41412391416127137, "learning_rate": 7.864993655230911e-06, "loss": 0.3114, "step": 1342 }, { "epoch": 1.12636287391669, "grad_norm": 0.43891629051293496, "learning_rate": 7.860991805957194e-06, "loss": 0.3397, "step": 1343 }, { "epoch": 1.1272015655577299, "grad_norm": 0.433283961834488, "learning_rate": 7.856987229940233e-06, "loss": 0.3532, "step": 1344 }, { "epoch": 1.12804025719877, "grad_norm": 0.37570762037013106, "learning_rate": 7.852979930996701e-06, "loss": 0.3039, "step": 1345 }, { "epoch": 1.1288789488398099, "grad_norm": 0.39935089819844344, "learning_rate": 7.848969912945854e-06, "loss": 0.3099, "step": 1346 }, { "epoch": 1.12971764048085, "grad_norm": 0.49028900343873105, "learning_rate": 7.844957179609544e-06, "loss": 0.3456, "step": 1347 }, { "epoch": 1.1305563321218899, "grad_norm": 0.41711788179950043, "learning_rate": 7.840941734812215e-06, "loss": 0.3339, "step": 1348 }, { "epoch": 1.1313950237629298, "grad_norm": 0.4379766783434579, "learning_rate": 7.836923582380892e-06, "loss": 0.3527, "step": 1349 }, { "epoch": 1.1322337154039699, "grad_norm": 0.448030817978755, "learning_rate": 7.832902726145177e-06, "loss": 0.3313, "step": 1350 }, { "epoch": 1.1330724070450098, "grad_norm": 0.40550533416368173, "learning_rate": 7.828879169937255e-06, "loss": 0.3617, "step": 1351 }, { "epoch": 1.1339110986860497, "grad_norm": 0.38238026315604545, "learning_rate": 7.824852917591883e-06, "loss": 0.3163, "step": 1352 }, { "epoch": 1.1347497903270898, "grad_norm": 0.4606080083073923, "learning_rate": 7.820823972946383e-06, "loss": 0.3586, "step": 1353 }, { "epoch": 1.1355884819681297, "grad_norm": 0.3859093226909096, "learning_rate": 7.816792339840648e-06, "loss": 0.3201, "step": 1354 }, { "epoch": 1.1364271736091698, "grad_norm": 0.4391732931891475, "learning_rate": 7.812758022117132e-06, "loss": 0.3469, "step": 1355 }, { "epoch": 1.1372658652502097, "grad_norm": 0.3888263522863471, "learning_rate": 7.808721023620846e-06, "loss": 0.3461, "step": 1356 }, { "epoch": 1.1381045568912496, "grad_norm": 0.3941696775643732, "learning_rate": 7.804681348199359e-06, "loss": 0.3654, "step": 1357 }, { "epoch": 1.1389432485322897, "grad_norm": 0.39954909423117685, "learning_rate": 7.80063899970279e-06, "loss": 0.3058, "step": 1358 }, { "epoch": 1.1397819401733296, "grad_norm": 0.38147347609588317, "learning_rate": 7.796593981983802e-06, "loss": 0.3164, "step": 1359 }, { "epoch": 1.1406206318143697, "grad_norm": 0.4603032331671978, "learning_rate": 7.792546298897608e-06, "loss": 0.3765, "step": 1360 }, { "epoch": 1.1414593234554096, "grad_norm": 0.41190885659267856, "learning_rate": 7.788495954301955e-06, "loss": 0.3303, "step": 1361 }, { "epoch": 1.1422980150964495, "grad_norm": 0.3437482453863542, "learning_rate": 7.784442952057134e-06, "loss": 0.2917, "step": 1362 }, { "epoch": 1.1431367067374896, "grad_norm": 0.4221382398597458, "learning_rate": 7.780387296025966e-06, "loss": 0.3477, "step": 1363 }, { "epoch": 1.1439753983785295, "grad_norm": 0.4253070381948561, "learning_rate": 7.776328990073795e-06, "loss": 0.314, "step": 1364 }, { "epoch": 1.1448140900195694, "grad_norm": 0.44766815440809543, "learning_rate": 7.7722680380685e-06, "loss": 0.3376, "step": 1365 }, { "epoch": 1.1456527816606095, "grad_norm": 0.4438424596466577, "learning_rate": 7.768204443880476e-06, "loss": 0.3282, "step": 1366 }, { "epoch": 1.1464914733016494, "grad_norm": 0.48326705355786514, "learning_rate": 7.76413821138264e-06, "loss": 0.3595, "step": 1367 }, { "epoch": 1.1473301649426895, "grad_norm": 0.43142108748330216, "learning_rate": 7.76006934445042e-06, "loss": 0.3102, "step": 1368 }, { "epoch": 1.1481688565837294, "grad_norm": 0.45789022624858833, "learning_rate": 7.755997846961754e-06, "loss": 0.3908, "step": 1369 }, { "epoch": 1.1490075482247692, "grad_norm": 0.4342792460476471, "learning_rate": 7.751923722797094e-06, "loss": 0.3242, "step": 1370 }, { "epoch": 1.1498462398658094, "grad_norm": 0.4445837292368704, "learning_rate": 7.747846975839389e-06, "loss": 0.3485, "step": 1371 }, { "epoch": 1.1506849315068493, "grad_norm": 0.397036002480759, "learning_rate": 7.743767609974087e-06, "loss": 0.3451, "step": 1372 }, { "epoch": 1.1515236231478894, "grad_norm": 0.39660896481545926, "learning_rate": 7.739685629089138e-06, "loss": 0.3013, "step": 1373 }, { "epoch": 1.1523623147889293, "grad_norm": 0.38304370296459, "learning_rate": 7.735601037074978e-06, "loss": 0.314, "step": 1374 }, { "epoch": 1.1532010064299691, "grad_norm": 0.4866222177328298, "learning_rate": 7.731513837824538e-06, "loss": 0.3762, "step": 1375 }, { "epoch": 1.1540396980710093, "grad_norm": 0.4638513769077512, "learning_rate": 7.727424035233229e-06, "loss": 0.3648, "step": 1376 }, { "epoch": 1.1548783897120491, "grad_norm": 0.4029721802903662, "learning_rate": 7.723331633198944e-06, "loss": 0.3399, "step": 1377 }, { "epoch": 1.1557170813530893, "grad_norm": 0.42781090339679495, "learning_rate": 7.719236635622052e-06, "loss": 0.3284, "step": 1378 }, { "epoch": 1.1565557729941291, "grad_norm": 0.37516154824513476, "learning_rate": 7.7151390464054e-06, "loss": 0.3281, "step": 1379 }, { "epoch": 1.157394464635169, "grad_norm": 0.44698696100224744, "learning_rate": 7.711038869454304e-06, "loss": 0.3564, "step": 1380 }, { "epoch": 1.1582331562762092, "grad_norm": 0.4342313015472691, "learning_rate": 7.706936108676544e-06, "loss": 0.3171, "step": 1381 }, { "epoch": 1.159071847917249, "grad_norm": 0.4273491228031657, "learning_rate": 7.702830767982363e-06, "loss": 0.3228, "step": 1382 }, { "epoch": 1.1599105395582892, "grad_norm": 0.40665439384056057, "learning_rate": 7.698722851284463e-06, "loss": 0.3051, "step": 1383 }, { "epoch": 1.160749231199329, "grad_norm": 0.4526029209989181, "learning_rate": 7.694612362498006e-06, "loss": 0.3723, "step": 1384 }, { "epoch": 1.161587922840369, "grad_norm": 0.38191600277718196, "learning_rate": 7.690499305540596e-06, "loss": 0.3425, "step": 1385 }, { "epoch": 1.162426614481409, "grad_norm": 0.4467671416630147, "learning_rate": 7.686383684332292e-06, "loss": 0.3153, "step": 1386 }, { "epoch": 1.163265306122449, "grad_norm": 0.47661324847337927, "learning_rate": 7.682265502795593e-06, "loss": 0.3248, "step": 1387 }, { "epoch": 1.164103997763489, "grad_norm": 0.44871661673069835, "learning_rate": 7.678144764855443e-06, "loss": 0.3416, "step": 1388 }, { "epoch": 1.164942689404529, "grad_norm": 0.42441986661908243, "learning_rate": 7.674021474439217e-06, "loss": 0.3579, "step": 1389 }, { "epoch": 1.1657813810455688, "grad_norm": 0.4093063985091075, "learning_rate": 7.669895635476725e-06, "loss": 0.3308, "step": 1390 }, { "epoch": 1.166620072686609, "grad_norm": 0.5766712620141627, "learning_rate": 7.665767251900207e-06, "loss": 0.3526, "step": 1391 }, { "epoch": 1.1674587643276488, "grad_norm": 0.4284276075247795, "learning_rate": 7.661636327644328e-06, "loss": 0.3266, "step": 1392 }, { "epoch": 1.168297455968689, "grad_norm": 0.4751143075662387, "learning_rate": 7.657502866646171e-06, "loss": 0.3347, "step": 1393 }, { "epoch": 1.1691361476097288, "grad_norm": 0.4958775994716418, "learning_rate": 7.65336687284524e-06, "loss": 0.3659, "step": 1394 }, { "epoch": 1.1699748392507687, "grad_norm": 0.49241564422211126, "learning_rate": 7.649228350183456e-06, "loss": 0.3902, "step": 1395 }, { "epoch": 1.1708135308918088, "grad_norm": 0.38828209003184394, "learning_rate": 7.64508730260514e-06, "loss": 0.293, "step": 1396 }, { "epoch": 1.1716522225328487, "grad_norm": 0.4551460286786463, "learning_rate": 7.640943734057032e-06, "loss": 0.3157, "step": 1397 }, { "epoch": 1.1724909141738888, "grad_norm": 0.4581165564086359, "learning_rate": 7.636797648488265e-06, "loss": 0.3717, "step": 1398 }, { "epoch": 1.1733296058149287, "grad_norm": 0.3777267344871635, "learning_rate": 7.632649049850377e-06, "loss": 0.3057, "step": 1399 }, { "epoch": 1.1741682974559686, "grad_norm": 0.45362979461758146, "learning_rate": 7.628497942097296e-06, "loss": 0.3718, "step": 1400 }, { "epoch": 1.1750069890970087, "grad_norm": 0.41317621137604865, "learning_rate": 7.624344329185347e-06, "loss": 0.3168, "step": 1401 }, { "epoch": 1.1758456807380486, "grad_norm": 0.36909202680874714, "learning_rate": 7.620188215073237e-06, "loss": 0.3323, "step": 1402 }, { "epoch": 1.1766843723790887, "grad_norm": 0.38055877775687197, "learning_rate": 7.61602960372206e-06, "loss": 0.3367, "step": 1403 }, { "epoch": 1.1775230640201286, "grad_norm": 0.3784120326150363, "learning_rate": 7.611868499095292e-06, "loss": 0.367, "step": 1404 }, { "epoch": 1.1783617556611685, "grad_norm": 0.38378217543637366, "learning_rate": 7.607704905158779e-06, "loss": 0.3509, "step": 1405 }, { "epoch": 1.1792004473022086, "grad_norm": 0.33824121508923594, "learning_rate": 7.603538825880747e-06, "loss": 0.2866, "step": 1406 }, { "epoch": 1.1800391389432485, "grad_norm": 0.3867489832540862, "learning_rate": 7.599370265231784e-06, "loss": 0.3461, "step": 1407 }, { "epoch": 1.1808778305842886, "grad_norm": 0.3977245993231145, "learning_rate": 7.595199227184851e-06, "loss": 0.3474, "step": 1408 }, { "epoch": 1.1817165222253285, "grad_norm": 0.3634033065652099, "learning_rate": 7.5910257157152575e-06, "loss": 0.3373, "step": 1409 }, { "epoch": 1.1825552138663684, "grad_norm": 0.36861978911736926, "learning_rate": 7.586849734800684e-06, "loss": 0.3222, "step": 1410 }, { "epoch": 1.1833939055074085, "grad_norm": 0.37395307268431915, "learning_rate": 7.582671288421157e-06, "loss": 0.357, "step": 1411 }, { "epoch": 1.1842325971484484, "grad_norm": 0.3968695581767433, "learning_rate": 7.578490380559053e-06, "loss": 0.3602, "step": 1412 }, { "epoch": 1.1850712887894883, "grad_norm": 0.38570015332141067, "learning_rate": 7.574307015199099e-06, "loss": 0.3247, "step": 1413 }, { "epoch": 1.1859099804305284, "grad_norm": 0.40389897130389, "learning_rate": 7.570121196328357e-06, "loss": 0.3356, "step": 1414 }, { "epoch": 1.1867486720715683, "grad_norm": 0.37717751800666416, "learning_rate": 7.5659329279362335e-06, "loss": 0.3571, "step": 1415 }, { "epoch": 1.1875873637126084, "grad_norm": 0.37952835623741865, "learning_rate": 7.561742214014469e-06, "loss": 0.3256, "step": 1416 }, { "epoch": 1.1884260553536483, "grad_norm": 0.41402368641671405, "learning_rate": 7.55754905855713e-06, "loss": 0.3515, "step": 1417 }, { "epoch": 1.1892647469946882, "grad_norm": 0.3428010654086189, "learning_rate": 7.553353465560615e-06, "loss": 0.3338, "step": 1418 }, { "epoch": 1.1901034386357283, "grad_norm": 0.4135974176590593, "learning_rate": 7.549155439023645e-06, "loss": 0.3294, "step": 1419 }, { "epoch": 1.1909421302767682, "grad_norm": 0.3909965420383449, "learning_rate": 7.5449549829472566e-06, "loss": 0.3265, "step": 1420 }, { "epoch": 1.191780821917808, "grad_norm": 0.3897462248664626, "learning_rate": 7.540752101334807e-06, "loss": 0.3511, "step": 1421 }, { "epoch": 1.1926195135588482, "grad_norm": 0.3795063572466375, "learning_rate": 7.536546798191959e-06, "loss": 0.3223, "step": 1422 }, { "epoch": 1.193458205199888, "grad_norm": 0.41750890928977175, "learning_rate": 7.532339077526694e-06, "loss": 0.3461, "step": 1423 }, { "epoch": 1.1942968968409282, "grad_norm": 0.40672416778552345, "learning_rate": 7.528128943349283e-06, "loss": 0.3371, "step": 1424 }, { "epoch": 1.195135588481968, "grad_norm": 0.3886545744608867, "learning_rate": 7.5239163996723095e-06, "loss": 0.3207, "step": 1425 }, { "epoch": 1.195974280123008, "grad_norm": 0.4382678180931479, "learning_rate": 7.519701450510646e-06, "loss": 0.3548, "step": 1426 }, { "epoch": 1.196812971764048, "grad_norm": 0.36260184500792536, "learning_rate": 7.51548409988146e-06, "loss": 0.3052, "step": 1427 }, { "epoch": 1.197651663405088, "grad_norm": 0.4191808689290853, "learning_rate": 7.511264351804212e-06, "loss": 0.3506, "step": 1428 }, { "epoch": 1.198490355046128, "grad_norm": 0.424637026895856, "learning_rate": 7.5070422103006415e-06, "loss": 0.3577, "step": 1429 }, { "epoch": 1.199329046687168, "grad_norm": 0.4099134147721274, "learning_rate": 7.502817679394768e-06, "loss": 0.3256, "step": 1430 }, { "epoch": 1.200167738328208, "grad_norm": 0.4179542689303479, "learning_rate": 7.498590763112896e-06, "loss": 0.3469, "step": 1431 }, { "epoch": 1.201006429969248, "grad_norm": 0.4102605151548099, "learning_rate": 7.494361465483596e-06, "loss": 0.3592, "step": 1432 }, { "epoch": 1.201845121610288, "grad_norm": 0.43186973651218674, "learning_rate": 7.490129790537712e-06, "loss": 0.3239, "step": 1433 }, { "epoch": 1.202683813251328, "grad_norm": 0.4131794201760392, "learning_rate": 7.485895742308354e-06, "loss": 0.3399, "step": 1434 }, { "epoch": 1.203522504892368, "grad_norm": 0.3828458982840703, "learning_rate": 7.481659324830889e-06, "loss": 0.3607, "step": 1435 }, { "epoch": 1.2043611965334078, "grad_norm": 0.3823321904219884, "learning_rate": 7.4774205421429495e-06, "loss": 0.3538, "step": 1436 }, { "epoch": 1.205199888174448, "grad_norm": 0.40393314926900153, "learning_rate": 7.473179398284417e-06, "loss": 0.3338, "step": 1437 }, { "epoch": 1.2060385798154878, "grad_norm": 0.38673005320339565, "learning_rate": 7.468935897297424e-06, "loss": 0.3421, "step": 1438 }, { "epoch": 1.206877271456528, "grad_norm": 0.37719505218225824, "learning_rate": 7.46469004322635e-06, "loss": 0.3712, "step": 1439 }, { "epoch": 1.2077159630975678, "grad_norm": 0.38812903174508184, "learning_rate": 7.460441840117821e-06, "loss": 0.3449, "step": 1440 }, { "epoch": 1.2085546547386077, "grad_norm": 0.42580456786703286, "learning_rate": 7.456191292020696e-06, "loss": 0.3323, "step": 1441 }, { "epoch": 1.2093933463796478, "grad_norm": 0.4125169495915466, "learning_rate": 7.451938402986071e-06, "loss": 0.3398, "step": 1442 }, { "epoch": 1.2102320380206877, "grad_norm": 0.3627423235533845, "learning_rate": 7.447683177067273e-06, "loss": 0.366, "step": 1443 }, { "epoch": 1.2110707296617278, "grad_norm": 0.3811118093393032, "learning_rate": 7.4434256183198576e-06, "loss": 0.3109, "step": 1444 }, { "epoch": 1.2119094213027677, "grad_norm": 0.5043911128652128, "learning_rate": 7.439165730801602e-06, "loss": 0.3797, "step": 1445 }, { "epoch": 1.2127481129438076, "grad_norm": 0.3858550187448878, "learning_rate": 7.4349035185725045e-06, "loss": 0.3225, "step": 1446 }, { "epoch": 1.2135868045848477, "grad_norm": 0.3842880151430232, "learning_rate": 7.4306389856947795e-06, "loss": 0.3253, "step": 1447 }, { "epoch": 1.2144254962258876, "grad_norm": 0.4619576424478044, "learning_rate": 7.426372136232848e-06, "loss": 0.367, "step": 1448 }, { "epoch": 1.2152641878669277, "grad_norm": 0.3819986478681608, "learning_rate": 7.4221029742533455e-06, "loss": 0.3556, "step": 1449 }, { "epoch": 1.2161028795079676, "grad_norm": 0.3364128871812495, "learning_rate": 7.417831503825109e-06, "loss": 0.3227, "step": 1450 }, { "epoch": 1.2169415711490075, "grad_norm": 0.3970526935257009, "learning_rate": 7.413557729019174e-06, "loss": 0.3302, "step": 1451 }, { "epoch": 1.2177802627900476, "grad_norm": 0.4051113038263828, "learning_rate": 7.409281653908773e-06, "loss": 0.3387, "step": 1452 }, { "epoch": 1.2186189544310875, "grad_norm": 0.3924846936542695, "learning_rate": 7.405003282569335e-06, "loss": 0.3261, "step": 1453 }, { "epoch": 1.2194576460721276, "grad_norm": 0.3768813039561812, "learning_rate": 7.400722619078471e-06, "loss": 0.2963, "step": 1454 }, { "epoch": 1.2202963377131675, "grad_norm": 0.40516807133212623, "learning_rate": 7.39643966751598e-06, "loss": 0.3299, "step": 1455 }, { "epoch": 1.2211350293542074, "grad_norm": 0.3902783622201966, "learning_rate": 7.39215443196384e-06, "loss": 0.3205, "step": 1456 }, { "epoch": 1.2219737209952475, "grad_norm": 0.3787352374754649, "learning_rate": 7.38786691650621e-06, "loss": 0.3555, "step": 1457 }, { "epoch": 1.2228124126362874, "grad_norm": 0.39546133704560293, "learning_rate": 7.383577125229418e-06, "loss": 0.3139, "step": 1458 }, { "epoch": 1.2236511042773275, "grad_norm": 0.4251468911708015, "learning_rate": 7.37928506222196e-06, "loss": 0.3573, "step": 1459 }, { "epoch": 1.2244897959183674, "grad_norm": 0.4020354442762468, "learning_rate": 7.374990731574503e-06, "loss": 0.3347, "step": 1460 }, { "epoch": 1.2253284875594073, "grad_norm": 0.3717965697376482, "learning_rate": 7.370694137379867e-06, "loss": 0.3294, "step": 1461 }, { "epoch": 1.2261671792004474, "grad_norm": 0.3756984558174627, "learning_rate": 7.366395283733037e-06, "loss": 0.328, "step": 1462 }, { "epoch": 1.2270058708414873, "grad_norm": 0.391437197812464, "learning_rate": 7.362094174731146e-06, "loss": 0.3579, "step": 1463 }, { "epoch": 1.2278445624825274, "grad_norm": 0.3607077123274922, "learning_rate": 7.35779081447348e-06, "loss": 0.3075, "step": 1464 }, { "epoch": 1.2286832541235673, "grad_norm": 0.40888028942797927, "learning_rate": 7.353485207061469e-06, "loss": 0.3661, "step": 1465 }, { "epoch": 1.2295219457646072, "grad_norm": 0.3686826011856283, "learning_rate": 7.349177356598686e-06, "loss": 0.3194, "step": 1466 }, { "epoch": 1.2303606374056473, "grad_norm": 0.36309021064391994, "learning_rate": 7.34486726719084e-06, "loss": 0.3306, "step": 1467 }, { "epoch": 1.2311993290466872, "grad_norm": 0.397869160051169, "learning_rate": 7.340554942945773e-06, "loss": 0.3593, "step": 1468 }, { "epoch": 1.232038020687727, "grad_norm": 0.4209596612275355, "learning_rate": 7.336240387973461e-06, "loss": 0.3167, "step": 1469 }, { "epoch": 1.2328767123287672, "grad_norm": 0.38747417070459483, "learning_rate": 7.331923606386003e-06, "loss": 0.3336, "step": 1470 }, { "epoch": 1.233715403969807, "grad_norm": 0.35414233737097106, "learning_rate": 7.32760460229762e-06, "loss": 0.3218, "step": 1471 }, { "epoch": 1.2345540956108472, "grad_norm": 0.3688605125547671, "learning_rate": 7.323283379824654e-06, "loss": 0.3318, "step": 1472 }, { "epoch": 1.235392787251887, "grad_norm": 0.37760590267755323, "learning_rate": 7.318959943085559e-06, "loss": 0.2997, "step": 1473 }, { "epoch": 1.236231478892927, "grad_norm": 0.41611623193917735, "learning_rate": 7.314634296200897e-06, "loss": 0.3497, "step": 1474 }, { "epoch": 1.237070170533967, "grad_norm": 0.3836999580049116, "learning_rate": 7.310306443293341e-06, "loss": 0.3631, "step": 1475 }, { "epoch": 1.237908862175007, "grad_norm": 0.41660647944602985, "learning_rate": 7.305976388487665e-06, "loss": 0.3542, "step": 1476 }, { "epoch": 1.2387475538160468, "grad_norm": 0.46540436299691523, "learning_rate": 7.301644135910743e-06, "loss": 0.2978, "step": 1477 }, { "epoch": 1.239586245457087, "grad_norm": 0.3921657051391014, "learning_rate": 7.2973096896915375e-06, "loss": 0.3402, "step": 1478 }, { "epoch": 1.2404249370981268, "grad_norm": 0.3667680941247037, "learning_rate": 7.2929730539611095e-06, "loss": 0.3421, "step": 1479 }, { "epoch": 1.241263628739167, "grad_norm": 0.39622295118307693, "learning_rate": 7.288634232852603e-06, "loss": 0.34, "step": 1480 }, { "epoch": 1.2421023203802068, "grad_norm": 0.3993567532538991, "learning_rate": 7.284293230501244e-06, "loss": 0.3452, "step": 1481 }, { "epoch": 1.2429410120212467, "grad_norm": 0.3947760958466651, "learning_rate": 7.279950051044338e-06, "loss": 0.3157, "step": 1482 }, { "epoch": 1.2437797036622869, "grad_norm": 0.41969943427848494, "learning_rate": 7.275604698621268e-06, "loss": 0.3351, "step": 1483 }, { "epoch": 1.2446183953033267, "grad_norm": 0.3954063843452236, "learning_rate": 7.271257177373487e-06, "loss": 0.3392, "step": 1484 }, { "epoch": 1.2454570869443669, "grad_norm": 0.41456391147932203, "learning_rate": 7.26690749144451e-06, "loss": 0.354, "step": 1485 }, { "epoch": 1.2462957785854067, "grad_norm": 0.39444016843519286, "learning_rate": 7.2625556449799224e-06, "loss": 0.3054, "step": 1486 }, { "epoch": 1.2471344702264466, "grad_norm": 0.40161158435084693, "learning_rate": 7.258201642127365e-06, "loss": 0.3552, "step": 1487 }, { "epoch": 1.2479731618674867, "grad_norm": 0.3551411874763546, "learning_rate": 7.253845487036533e-06, "loss": 0.3267, "step": 1488 }, { "epoch": 1.2488118535085266, "grad_norm": 0.40371670066671594, "learning_rate": 7.2494871838591764e-06, "loss": 0.345, "step": 1489 }, { "epoch": 1.2496505451495667, "grad_norm": 0.3798800083474833, "learning_rate": 7.245126736749089e-06, "loss": 0.357, "step": 1490 }, { "epoch": 1.2504892367906066, "grad_norm": 0.37533529529186405, "learning_rate": 7.240764149862108e-06, "loss": 0.3459, "step": 1491 }, { "epoch": 1.2513279284316465, "grad_norm": 0.37685483185772484, "learning_rate": 7.236399427356114e-06, "loss": 0.3185, "step": 1492 }, { "epoch": 1.2521666200726866, "grad_norm": 0.346070163188884, "learning_rate": 7.232032573391019e-06, "loss": 0.3115, "step": 1493 }, { "epoch": 1.2530053117137265, "grad_norm": 0.3605545117546423, "learning_rate": 7.227663592128766e-06, "loss": 0.3345, "step": 1494 }, { "epoch": 1.2538440033547666, "grad_norm": 0.4234567522117501, "learning_rate": 7.223292487733328e-06, "loss": 0.3812, "step": 1495 }, { "epoch": 1.2546826949958065, "grad_norm": 0.3632138457496506, "learning_rate": 7.218919264370704e-06, "loss": 0.31, "step": 1496 }, { "epoch": 1.2555213866368464, "grad_norm": 0.3982046838228566, "learning_rate": 7.214543926208906e-06, "loss": 0.3243, "step": 1497 }, { "epoch": 1.2563600782778865, "grad_norm": 0.4744737361864385, "learning_rate": 7.210166477417964e-06, "loss": 0.3759, "step": 1498 }, { "epoch": 1.2571987699189264, "grad_norm": 0.394602673700182, "learning_rate": 7.20578692216992e-06, "loss": 0.3396, "step": 1499 }, { "epoch": 1.2580374615599665, "grad_norm": 0.40871183498058455, "learning_rate": 7.201405264638828e-06, "loss": 0.3173, "step": 1500 }, { "epoch": 1.2588761532010064, "grad_norm": 0.46150959740592296, "learning_rate": 7.197021509000737e-06, "loss": 0.3224, "step": 1501 }, { "epoch": 1.2597148448420463, "grad_norm": 0.36532497636531286, "learning_rate": 7.192635659433702e-06, "loss": 0.3251, "step": 1502 }, { "epoch": 1.2605535364830864, "grad_norm": 0.39640286461676455, "learning_rate": 7.1882477201177735e-06, "loss": 0.3232, "step": 1503 }, { "epoch": 1.2613922281241263, "grad_norm": 0.39841522460685, "learning_rate": 7.18385769523499e-06, "loss": 0.3226, "step": 1504 }, { "epoch": 1.2622309197651664, "grad_norm": 0.42123618306145566, "learning_rate": 7.179465588969381e-06, "loss": 0.3388, "step": 1505 }, { "epoch": 1.2630696114062063, "grad_norm": 0.3803327378170036, "learning_rate": 7.175071405506957e-06, "loss": 0.3154, "step": 1506 }, { "epoch": 1.2639083030472462, "grad_norm": 0.43965777278624785, "learning_rate": 7.170675149035711e-06, "loss": 0.3459, "step": 1507 }, { "epoch": 1.2647469946882863, "grad_norm": 0.3718486414766645, "learning_rate": 7.166276823745609e-06, "loss": 0.305, "step": 1508 }, { "epoch": 1.2655856863293262, "grad_norm": 0.3898306519871552, "learning_rate": 7.161876433828594e-06, "loss": 0.334, "step": 1509 }, { "epoch": 1.2664243779703663, "grad_norm": 0.3554862133566785, "learning_rate": 7.157473983478569e-06, "loss": 0.306, "step": 1510 }, { "epoch": 1.2672630696114062, "grad_norm": 0.38317069682038796, "learning_rate": 7.1530694768914064e-06, "loss": 0.3718, "step": 1511 }, { "epoch": 1.2681017612524461, "grad_norm": 0.39800919241928745, "learning_rate": 7.148662918264936e-06, "loss": 0.3551, "step": 1512 }, { "epoch": 1.2689404528934862, "grad_norm": 0.40347686420413004, "learning_rate": 7.144254311798944e-06, "loss": 0.3234, "step": 1513 }, { "epoch": 1.2697791445345261, "grad_norm": 0.4028981534350198, "learning_rate": 7.13984366169517e-06, "loss": 0.3485, "step": 1514 }, { "epoch": 1.2706178361755662, "grad_norm": 0.41860459125182564, "learning_rate": 7.135430972157297e-06, "loss": 0.3292, "step": 1515 }, { "epoch": 1.2714565278166061, "grad_norm": 0.3706124901918793, "learning_rate": 7.1310162473909575e-06, "loss": 0.3072, "step": 1516 }, { "epoch": 1.272295219457646, "grad_norm": 0.3623325767247628, "learning_rate": 7.126599491603716e-06, "loss": 0.3276, "step": 1517 }, { "epoch": 1.2731339110986861, "grad_norm": 0.37092881526702853, "learning_rate": 7.122180709005084e-06, "loss": 0.3503, "step": 1518 }, { "epoch": 1.273972602739726, "grad_norm": 0.3934365447178884, "learning_rate": 7.117759903806491e-06, "loss": 0.3351, "step": 1519 }, { "epoch": 1.2748112943807661, "grad_norm": 0.4237096612318686, "learning_rate": 7.1133370802213085e-06, "loss": 0.377, "step": 1520 }, { "epoch": 1.275649986021806, "grad_norm": 0.3413053928447647, "learning_rate": 7.108912242464819e-06, "loss": 0.3021, "step": 1521 }, { "epoch": 1.276488677662846, "grad_norm": 0.3886149230937306, "learning_rate": 7.104485394754232e-06, "loss": 0.3539, "step": 1522 }, { "epoch": 1.277327369303886, "grad_norm": 0.37265731228944915, "learning_rate": 7.1000565413086705e-06, "loss": 0.334, "step": 1523 }, { "epoch": 1.278166060944926, "grad_norm": 0.3888406918597801, "learning_rate": 7.09562568634917e-06, "loss": 0.3271, "step": 1524 }, { "epoch": 1.279004752585966, "grad_norm": 0.3500993841084979, "learning_rate": 7.091192834098673e-06, "loss": 0.3222, "step": 1525 }, { "epoch": 1.279843444227006, "grad_norm": 0.39625897513166203, "learning_rate": 7.0867579887820225e-06, "loss": 0.3534, "step": 1526 }, { "epoch": 1.2806821358680458, "grad_norm": 0.35980912150015154, "learning_rate": 7.082321154625969e-06, "loss": 0.3329, "step": 1527 }, { "epoch": 1.2815208275090857, "grad_norm": 0.4195071031262667, "learning_rate": 7.077882335859151e-06, "loss": 0.3338, "step": 1528 }, { "epoch": 1.2823595191501258, "grad_norm": 0.3415996225904976, "learning_rate": 7.073441536712101e-06, "loss": 0.3005, "step": 1529 }, { "epoch": 1.283198210791166, "grad_norm": 0.40086357053269633, "learning_rate": 7.068998761417239e-06, "loss": 0.3394, "step": 1530 }, { "epoch": 1.2840369024322058, "grad_norm": 0.392449037100752, "learning_rate": 7.0645540142088685e-06, "loss": 0.3366, "step": 1531 }, { "epoch": 1.2848755940732457, "grad_norm": 0.37537435145554554, "learning_rate": 7.0601072993231734e-06, "loss": 0.3263, "step": 1532 }, { "epoch": 1.2857142857142856, "grad_norm": 0.40712643294125295, "learning_rate": 7.055658620998212e-06, "loss": 0.3336, "step": 1533 }, { "epoch": 1.2865529773553257, "grad_norm": 0.3881763347597101, "learning_rate": 7.051207983473911e-06, "loss": 0.3131, "step": 1534 }, { "epoch": 1.2873916689963656, "grad_norm": 0.3503827323761734, "learning_rate": 7.046755390992071e-06, "loss": 0.329, "step": 1535 }, { "epoch": 1.2882303606374057, "grad_norm": 0.443025183063486, "learning_rate": 7.0423008477963496e-06, "loss": 0.3809, "step": 1536 }, { "epoch": 1.2890690522784456, "grad_norm": 0.4221153635068458, "learning_rate": 7.037844358132267e-06, "loss": 0.3587, "step": 1537 }, { "epoch": 1.2899077439194855, "grad_norm": 0.3805565892757407, "learning_rate": 7.033385926247195e-06, "loss": 0.3789, "step": 1538 }, { "epoch": 1.2907464355605256, "grad_norm": 0.410859141416148, "learning_rate": 7.0289255563903624e-06, "loss": 0.3131, "step": 1539 }, { "epoch": 1.2915851272015655, "grad_norm": 0.36956197475496566, "learning_rate": 7.024463252812841e-06, "loss": 0.3035, "step": 1540 }, { "epoch": 1.2924238188426056, "grad_norm": 0.40453691769661987, "learning_rate": 7.019999019767544e-06, "loss": 0.3247, "step": 1541 }, { "epoch": 1.2932625104836455, "grad_norm": 0.43350541887476307, "learning_rate": 7.015532861509227e-06, "loss": 0.3709, "step": 1542 }, { "epoch": 1.2941012021246854, "grad_norm": 0.38337917886061457, "learning_rate": 7.011064782294481e-06, "loss": 0.298, "step": 1543 }, { "epoch": 1.2949398937657255, "grad_norm": 0.4408231557089736, "learning_rate": 7.006594786381722e-06, "loss": 0.3622, "step": 1544 }, { "epoch": 1.2957785854067654, "grad_norm": 0.3666393723452815, "learning_rate": 7.002122878031201e-06, "loss": 0.3404, "step": 1545 }, { "epoch": 1.2966172770478055, "grad_norm": 0.37396773570656805, "learning_rate": 6.9976490615049855e-06, "loss": 0.3099, "step": 1546 }, { "epoch": 1.2974559686888454, "grad_norm": 0.39407965476838497, "learning_rate": 6.993173341066963e-06, "loss": 0.3301, "step": 1547 }, { "epoch": 1.2982946603298853, "grad_norm": 0.3646246940460644, "learning_rate": 6.988695720982838e-06, "loss": 0.3302, "step": 1548 }, { "epoch": 1.2991333519709254, "grad_norm": 0.3956581993906088, "learning_rate": 6.9842162055201225e-06, "loss": 0.3593, "step": 1549 }, { "epoch": 1.2999720436119653, "grad_norm": 0.3535904150563332, "learning_rate": 6.9797347989481345e-06, "loss": 0.3075, "step": 1550 }, { "epoch": 1.3008107352530054, "grad_norm": 0.4444219266624833, "learning_rate": 6.975251505537999e-06, "loss": 0.3862, "step": 1551 }, { "epoch": 1.3016494268940453, "grad_norm": 0.4126407767910258, "learning_rate": 6.970766329562635e-06, "loss": 0.3481, "step": 1552 }, { "epoch": 1.3024881185350852, "grad_norm": 0.38917357133207686, "learning_rate": 6.966279275296759e-06, "loss": 0.3401, "step": 1553 }, { "epoch": 1.3033268101761253, "grad_norm": 0.37692804027897375, "learning_rate": 6.961790347016873e-06, "loss": 0.334, "step": 1554 }, { "epoch": 1.3041655018171652, "grad_norm": 0.3825801476286627, "learning_rate": 6.9572995490012666e-06, "loss": 0.2964, "step": 1555 }, { "epoch": 1.3050041934582053, "grad_norm": 0.38330403895881543, "learning_rate": 6.952806885530015e-06, "loss": 0.3263, "step": 1556 }, { "epoch": 1.3058428850992452, "grad_norm": 0.38859114203734185, "learning_rate": 6.94831236088497e-06, "loss": 0.3457, "step": 1557 }, { "epoch": 1.306681576740285, "grad_norm": 0.4388122112656366, "learning_rate": 6.943815979349755e-06, "loss": 0.3801, "step": 1558 }, { "epoch": 1.3075202683813252, "grad_norm": 0.38903190766924356, "learning_rate": 6.939317745209763e-06, "loss": 0.2966, "step": 1559 }, { "epoch": 1.308358960022365, "grad_norm": 0.4258353600033022, "learning_rate": 6.934817662752153e-06, "loss": 0.3516, "step": 1560 }, { "epoch": 1.3091976516634052, "grad_norm": 0.41282690889315116, "learning_rate": 6.930315736265852e-06, "loss": 0.3328, "step": 1561 }, { "epoch": 1.310036343304445, "grad_norm": 0.4151300466182574, "learning_rate": 6.925811970041534e-06, "loss": 0.3531, "step": 1562 }, { "epoch": 1.310875034945485, "grad_norm": 0.3810982476899316, "learning_rate": 6.921306368371633e-06, "loss": 0.3251, "step": 1563 }, { "epoch": 1.311713726586525, "grad_norm": 0.3949638199026879, "learning_rate": 6.91679893555033e-06, "loss": 0.3551, "step": 1564 }, { "epoch": 1.312552418227565, "grad_norm": 0.3780256883320436, "learning_rate": 6.912289675873553e-06, "loss": 0.3306, "step": 1565 }, { "epoch": 1.313391109868605, "grad_norm": 0.4101945191504385, "learning_rate": 6.907778593638972e-06, "loss": 0.3531, "step": 1566 }, { "epoch": 1.314229801509645, "grad_norm": 0.41209139928706745, "learning_rate": 6.903265693145988e-06, "loss": 0.3269, "step": 1567 }, { "epoch": 1.3150684931506849, "grad_norm": 0.40955383982983806, "learning_rate": 6.898750978695742e-06, "loss": 0.318, "step": 1568 }, { "epoch": 1.315907184791725, "grad_norm": 0.3844928656774288, "learning_rate": 6.894234454591099e-06, "loss": 0.334, "step": 1569 }, { "epoch": 1.3167458764327649, "grad_norm": 0.4384252152531695, "learning_rate": 6.889716125136653e-06, "loss": 0.3674, "step": 1570 }, { "epoch": 1.317584568073805, "grad_norm": 0.43146586962261685, "learning_rate": 6.885195994638716e-06, "loss": 0.3269, "step": 1571 }, { "epoch": 1.3184232597148449, "grad_norm": 0.3904057950009554, "learning_rate": 6.8806740674053155e-06, "loss": 0.3449, "step": 1572 }, { "epoch": 1.3192619513558848, "grad_norm": 0.35361089363149195, "learning_rate": 6.876150347746192e-06, "loss": 0.3448, "step": 1573 }, { "epoch": 1.3201006429969249, "grad_norm": 0.4394955144799562, "learning_rate": 6.871624839972798e-06, "loss": 0.3519, "step": 1574 }, { "epoch": 1.3209393346379648, "grad_norm": 0.4568367483002617, "learning_rate": 6.867097548398285e-06, "loss": 0.3743, "step": 1575 }, { "epoch": 1.3217780262790049, "grad_norm": 0.3959702177620034, "learning_rate": 6.862568477337508e-06, "loss": 0.3156, "step": 1576 }, { "epoch": 1.3226167179200448, "grad_norm": 0.4318213244850707, "learning_rate": 6.858037631107016e-06, "loss": 0.3326, "step": 1577 }, { "epoch": 1.3234554095610847, "grad_norm": 0.4552379513479908, "learning_rate": 6.853505014025052e-06, "loss": 0.3632, "step": 1578 }, { "epoch": 1.3242941012021248, "grad_norm": 0.38543996138899683, "learning_rate": 6.848970630411546e-06, "loss": 0.3042, "step": 1579 }, { "epoch": 1.3251327928431647, "grad_norm": 0.37719160245184846, "learning_rate": 6.844434484588109e-06, "loss": 0.3388, "step": 1580 }, { "epoch": 1.3259714844842048, "grad_norm": 0.42073166928127487, "learning_rate": 6.839896580878034e-06, "loss": 0.3417, "step": 1581 }, { "epoch": 1.3268101761252447, "grad_norm": 0.38538400949063506, "learning_rate": 6.835356923606291e-06, "loss": 0.3324, "step": 1582 }, { "epoch": 1.3276488677662845, "grad_norm": 0.3776494561014276, "learning_rate": 6.830815517099518e-06, "loss": 0.3145, "step": 1583 }, { "epoch": 1.3284875594073244, "grad_norm": 0.38872789140216857, "learning_rate": 6.826272365686023e-06, "loss": 0.3283, "step": 1584 }, { "epoch": 1.3293262510483645, "grad_norm": 0.4205897594360861, "learning_rate": 6.821727473695774e-06, "loss": 0.3667, "step": 1585 }, { "epoch": 1.3301649426894047, "grad_norm": 0.34754892273659466, "learning_rate": 6.817180845460398e-06, "loss": 0.2936, "step": 1586 }, { "epoch": 1.3310036343304446, "grad_norm": 0.39466848453188036, "learning_rate": 6.81263248531318e-06, "loss": 0.3828, "step": 1587 }, { "epoch": 1.3318423259714844, "grad_norm": 0.3892994145982005, "learning_rate": 6.808082397589055e-06, "loss": 0.3175, "step": 1588 }, { "epoch": 1.3326810176125243, "grad_norm": 0.3899075646911034, "learning_rate": 6.803530586624603e-06, "loss": 0.3567, "step": 1589 }, { "epoch": 1.3335197092535644, "grad_norm": 0.4241549925227182, "learning_rate": 6.798977056758045e-06, "loss": 0.3618, "step": 1590 }, { "epoch": 1.3343584008946043, "grad_norm": 0.38393883366134496, "learning_rate": 6.794421812329244e-06, "loss": 0.3047, "step": 1591 }, { "epoch": 1.3351970925356444, "grad_norm": 0.3658349131964977, "learning_rate": 6.789864857679693e-06, "loss": 0.3121, "step": 1592 }, { "epoch": 1.3360357841766843, "grad_norm": 0.4330348760232603, "learning_rate": 6.785306197152518e-06, "loss": 0.3769, "step": 1593 }, { "epoch": 1.3368744758177242, "grad_norm": 0.3667636196319053, "learning_rate": 6.78074583509247e-06, "loss": 0.2948, "step": 1594 }, { "epoch": 1.3377131674587643, "grad_norm": 0.38278393949983525, "learning_rate": 6.776183775845924e-06, "loss": 0.3676, "step": 1595 }, { "epoch": 1.3385518590998042, "grad_norm": 0.37637707268238774, "learning_rate": 6.771620023760867e-06, "loss": 0.332, "step": 1596 }, { "epoch": 1.3393905507408443, "grad_norm": 0.39143037853241275, "learning_rate": 6.767054583186903e-06, "loss": 0.3233, "step": 1597 }, { "epoch": 1.3402292423818842, "grad_norm": 0.3841706619978441, "learning_rate": 6.7624874584752466e-06, "loss": 0.3407, "step": 1598 }, { "epoch": 1.3410679340229241, "grad_norm": 0.37501477025157, "learning_rate": 6.757918653978713e-06, "loss": 0.3292, "step": 1599 }, { "epoch": 1.3419066256639642, "grad_norm": 0.38730792825979166, "learning_rate": 6.753348174051721e-06, "loss": 0.3232, "step": 1600 }, { "epoch": 1.3427453173050041, "grad_norm": 0.41388052793629776, "learning_rate": 6.7487760230502905e-06, "loss": 0.3329, "step": 1601 }, { "epoch": 1.3435840089460442, "grad_norm": 0.3638648265217065, "learning_rate": 6.744202205332027e-06, "loss": 0.3305, "step": 1602 }, { "epoch": 1.3444227005870841, "grad_norm": 0.4199596344758675, "learning_rate": 6.739626725256127e-06, "loss": 0.3537, "step": 1603 }, { "epoch": 1.345261392228124, "grad_norm": 0.37944507590918003, "learning_rate": 6.735049587183372e-06, "loss": 0.3299, "step": 1604 }, { "epoch": 1.3461000838691641, "grad_norm": 0.3896074721392447, "learning_rate": 6.730470795476124e-06, "loss": 0.2976, "step": 1605 }, { "epoch": 1.346938775510204, "grad_norm": 0.43603682219177187, "learning_rate": 6.725890354498321e-06, "loss": 0.3551, "step": 1606 }, { "epoch": 1.3477774671512441, "grad_norm": 0.41000467019502784, "learning_rate": 6.7213082686154705e-06, "loss": 0.3207, "step": 1607 }, { "epoch": 1.348616158792284, "grad_norm": 0.37512618023165323, "learning_rate": 6.716724542194652e-06, "loss": 0.3282, "step": 1608 }, { "epoch": 1.349454850433324, "grad_norm": 0.4020658040146113, "learning_rate": 6.712139179604504e-06, "loss": 0.3874, "step": 1609 }, { "epoch": 1.350293542074364, "grad_norm": 0.4554166555834967, "learning_rate": 6.707552185215228e-06, "loss": 0.327, "step": 1610 }, { "epoch": 1.351132233715404, "grad_norm": 0.42003516748384573, "learning_rate": 6.702963563398578e-06, "loss": 0.358, "step": 1611 }, { "epoch": 1.351970925356444, "grad_norm": 0.3396792798459843, "learning_rate": 6.69837331852786e-06, "loss": 0.2893, "step": 1612 }, { "epoch": 1.352809616997484, "grad_norm": 0.40512121734622386, "learning_rate": 6.69378145497793e-06, "loss": 0.354, "step": 1613 }, { "epoch": 1.3536483086385238, "grad_norm": 0.385338438193797, "learning_rate": 6.6891879771251815e-06, "loss": 0.3618, "step": 1614 }, { "epoch": 1.354487000279564, "grad_norm": 0.37028850824943255, "learning_rate": 6.684592889347549e-06, "loss": 0.3502, "step": 1615 }, { "epoch": 1.3553256919206038, "grad_norm": 0.40583319647024824, "learning_rate": 6.679996196024501e-06, "loss": 0.3316, "step": 1616 }, { "epoch": 1.356164383561644, "grad_norm": 0.3540331998831698, "learning_rate": 6.67539790153704e-06, "loss": 0.3051, "step": 1617 }, { "epoch": 1.3570030752026838, "grad_norm": 0.3958559953232498, "learning_rate": 6.670798010267685e-06, "loss": 0.3635, "step": 1618 }, { "epoch": 1.3578417668437237, "grad_norm": 0.3766137011892899, "learning_rate": 6.666196526600489e-06, "loss": 0.3193, "step": 1619 }, { "epoch": 1.3586804584847638, "grad_norm": 0.3771218595861394, "learning_rate": 6.661593454921014e-06, "loss": 0.332, "step": 1620 }, { "epoch": 1.3595191501258037, "grad_norm": 0.39096207790508736, "learning_rate": 6.656988799616337e-06, "loss": 0.3277, "step": 1621 }, { "epoch": 1.3603578417668438, "grad_norm": 0.3432155680365179, "learning_rate": 6.652382565075048e-06, "loss": 0.3031, "step": 1622 }, { "epoch": 1.3611965334078837, "grad_norm": 0.40931783835394403, "learning_rate": 6.647774755687239e-06, "loss": 0.3861, "step": 1623 }, { "epoch": 1.3620352250489236, "grad_norm": 0.4196839001453514, "learning_rate": 6.6431653758445e-06, "loss": 0.3301, "step": 1624 }, { "epoch": 1.3628739166899637, "grad_norm": 0.42161248848332056, "learning_rate": 6.638554429939926e-06, "loss": 0.3343, "step": 1625 }, { "epoch": 1.3637126083310036, "grad_norm": 0.3548718789448241, "learning_rate": 6.633941922368099e-06, "loss": 0.335, "step": 1626 }, { "epoch": 1.3645512999720437, "grad_norm": 0.367944947835639, "learning_rate": 6.6293278575250875e-06, "loss": 0.3039, "step": 1627 }, { "epoch": 1.3653899916130836, "grad_norm": 0.4136524141715357, "learning_rate": 6.624712239808451e-06, "loss": 0.3317, "step": 1628 }, { "epoch": 1.3662286832541235, "grad_norm": 0.4128011092542424, "learning_rate": 6.62009507361722e-06, "loss": 0.3441, "step": 1629 }, { "epoch": 1.3670673748951636, "grad_norm": 0.38660254142123873, "learning_rate": 6.6154763633519095e-06, "loss": 0.353, "step": 1630 }, { "epoch": 1.3679060665362035, "grad_norm": 0.35973202909272717, "learning_rate": 6.610856113414501e-06, "loss": 0.3292, "step": 1631 }, { "epoch": 1.3687447581772436, "grad_norm": 0.36487258161320524, "learning_rate": 6.606234328208446e-06, "loss": 0.3085, "step": 1632 }, { "epoch": 1.3695834498182835, "grad_norm": 0.43714461853398096, "learning_rate": 6.601611012138655e-06, "loss": 0.3436, "step": 1633 }, { "epoch": 1.3704221414593234, "grad_norm": 0.42505937855309306, "learning_rate": 6.596986169611506e-06, "loss": 0.3495, "step": 1634 }, { "epoch": 1.3712608331003635, "grad_norm": 0.3997163610493674, "learning_rate": 6.592359805034823e-06, "loss": 0.3253, "step": 1635 }, { "epoch": 1.3720995247414034, "grad_norm": 0.39997994716608115, "learning_rate": 6.5877319228178815e-06, "loss": 0.3299, "step": 1636 }, { "epoch": 1.3729382163824435, "grad_norm": 0.37369843588850993, "learning_rate": 6.583102527371411e-06, "loss": 0.3157, "step": 1637 }, { "epoch": 1.3737769080234834, "grad_norm": 0.3848655702411657, "learning_rate": 6.578471623107574e-06, "loss": 0.3341, "step": 1638 }, { "epoch": 1.3746155996645233, "grad_norm": 0.3425038297578566, "learning_rate": 6.573839214439978e-06, "loss": 0.3123, "step": 1639 }, { "epoch": 1.3754542913055634, "grad_norm": 0.3996030837843356, "learning_rate": 6.569205305783663e-06, "loss": 0.3379, "step": 1640 }, { "epoch": 1.3762929829466033, "grad_norm": 0.402178141617903, "learning_rate": 6.5645699015550935e-06, "loss": 0.3621, "step": 1641 }, { "epoch": 1.3771316745876434, "grad_norm": 0.3554461328230579, "learning_rate": 6.559933006172162e-06, "loss": 0.3291, "step": 1642 }, { "epoch": 1.3779703662286833, "grad_norm": 0.34521405639164343, "learning_rate": 6.555294624054187e-06, "loss": 0.3153, "step": 1643 }, { "epoch": 1.3788090578697232, "grad_norm": 0.41177063459052327, "learning_rate": 6.5506547596219e-06, "loss": 0.3565, "step": 1644 }, { "epoch": 1.379647749510763, "grad_norm": 0.36404920160798077, "learning_rate": 6.546013417297442e-06, "loss": 0.2952, "step": 1645 }, { "epoch": 1.3804864411518032, "grad_norm": 0.36610347074367705, "learning_rate": 6.5413706015043685e-06, "loss": 0.3248, "step": 1646 }, { "epoch": 1.3813251327928433, "grad_norm": 0.4003819799429143, "learning_rate": 6.536726316667636e-06, "loss": 0.3124, "step": 1647 }, { "epoch": 1.3821638244338832, "grad_norm": 0.4052694946813739, "learning_rate": 6.532080567213605e-06, "loss": 0.3493, "step": 1648 }, { "epoch": 1.383002516074923, "grad_norm": 0.3876231241096745, "learning_rate": 6.527433357570022e-06, "loss": 0.333, "step": 1649 }, { "epoch": 1.383841207715963, "grad_norm": 0.4196512830313264, "learning_rate": 6.5227846921660375e-06, "loss": 0.3686, "step": 1650 }, { "epoch": 1.384679899357003, "grad_norm": 0.42223992302453967, "learning_rate": 6.518134575432184e-06, "loss": 0.2969, "step": 1651 }, { "epoch": 1.385518590998043, "grad_norm": 0.46857583601016684, "learning_rate": 6.513483011800376e-06, "loss": 0.334, "step": 1652 }, { "epoch": 1.386357282639083, "grad_norm": 0.3683227840518084, "learning_rate": 6.508830005703905e-06, "loss": 0.3056, "step": 1653 }, { "epoch": 1.387195974280123, "grad_norm": 0.45652691206088614, "learning_rate": 6.5041755615774445e-06, "loss": 0.3436, "step": 1654 }, { "epoch": 1.3880346659211629, "grad_norm": 0.3976848752705777, "learning_rate": 6.499519683857034e-06, "loss": 0.362, "step": 1655 }, { "epoch": 1.388873357562203, "grad_norm": 0.38633389716948097, "learning_rate": 6.4948623769800774e-06, "loss": 0.318, "step": 1656 }, { "epoch": 1.3897120492032429, "grad_norm": 0.39728676178064526, "learning_rate": 6.490203645385347e-06, "loss": 0.34, "step": 1657 }, { "epoch": 1.390550740844283, "grad_norm": 0.40522192288771186, "learning_rate": 6.485543493512967e-06, "loss": 0.3668, "step": 1658 }, { "epoch": 1.3913894324853229, "grad_norm": 0.3910904522943937, "learning_rate": 6.480881925804414e-06, "loss": 0.3549, "step": 1659 }, { "epoch": 1.3922281241263628, "grad_norm": 0.3839530108748216, "learning_rate": 6.476218946702524e-06, "loss": 0.2967, "step": 1660 }, { "epoch": 1.3930668157674029, "grad_norm": 0.4054066606100142, "learning_rate": 6.471554560651465e-06, "loss": 0.3578, "step": 1661 }, { "epoch": 1.3939055074084428, "grad_norm": 0.37407155804365155, "learning_rate": 6.466888772096757e-06, "loss": 0.3059, "step": 1662 }, { "epoch": 1.3947441990494829, "grad_norm": 0.3898664638997256, "learning_rate": 6.462221585485249e-06, "loss": 0.3351, "step": 1663 }, { "epoch": 1.3955828906905228, "grad_norm": 0.4062641230286891, "learning_rate": 6.457553005265129e-06, "loss": 0.3427, "step": 1664 }, { "epoch": 1.3964215823315627, "grad_norm": 0.4006977411310576, "learning_rate": 6.452883035885906e-06, "loss": 0.3606, "step": 1665 }, { "epoch": 1.3972602739726028, "grad_norm": 0.4075999939012909, "learning_rate": 6.44821168179842e-06, "loss": 0.3468, "step": 1666 }, { "epoch": 1.3980989656136427, "grad_norm": 0.3879034241639464, "learning_rate": 6.443538947454824e-06, "loss": 0.3062, "step": 1667 }, { "epoch": 1.3989376572546828, "grad_norm": 0.41451417693207027, "learning_rate": 6.4388648373085924e-06, "loss": 0.332, "step": 1668 }, { "epoch": 1.3997763488957227, "grad_norm": 0.4112452683462566, "learning_rate": 6.434189355814508e-06, "loss": 0.343, "step": 1669 }, { "epoch": 1.4006150405367626, "grad_norm": 0.4128410123380975, "learning_rate": 6.429512507428664e-06, "loss": 0.341, "step": 1670 }, { "epoch": 1.4014537321778027, "grad_norm": 0.397668493533338, "learning_rate": 6.424834296608446e-06, "loss": 0.3296, "step": 1671 }, { "epoch": 1.4022924238188426, "grad_norm": 0.3736700617265305, "learning_rate": 6.420154727812552e-06, "loss": 0.3294, "step": 1672 }, { "epoch": 1.4031311154598827, "grad_norm": 0.39259495638486447, "learning_rate": 6.4154738055009645e-06, "loss": 0.3378, "step": 1673 }, { "epoch": 1.4039698071009226, "grad_norm": 0.413365013245553, "learning_rate": 6.41079153413496e-06, "loss": 0.3495, "step": 1674 }, { "epoch": 1.4048084987419625, "grad_norm": 0.36932484182446207, "learning_rate": 6.4061079181771016e-06, "loss": 0.315, "step": 1675 }, { "epoch": 1.4056471903830026, "grad_norm": 0.3548621162623505, "learning_rate": 6.401422962091228e-06, "loss": 0.3104, "step": 1676 }, { "epoch": 1.4064858820240425, "grad_norm": 0.41486604368208824, "learning_rate": 6.396736670342465e-06, "loss": 0.3575, "step": 1677 }, { "epoch": 1.4073245736650826, "grad_norm": 0.4160241129974556, "learning_rate": 6.392049047397203e-06, "loss": 0.3438, "step": 1678 }, { "epoch": 1.4081632653061225, "grad_norm": 0.3800902300229071, "learning_rate": 6.387360097723102e-06, "loss": 0.3363, "step": 1679 }, { "epoch": 1.4090019569471623, "grad_norm": 0.38702428728967553, "learning_rate": 6.382669825789092e-06, "loss": 0.3577, "step": 1680 }, { "epoch": 1.4098406485882025, "grad_norm": 0.36471041006519683, "learning_rate": 6.377978236065356e-06, "loss": 0.3177, "step": 1681 }, { "epoch": 1.4106793402292424, "grad_norm": 0.3672360163030534, "learning_rate": 6.3732853330233405e-06, "loss": 0.3165, "step": 1682 }, { "epoch": 1.4115180318702825, "grad_norm": 0.4021497701358822, "learning_rate": 6.368591121135738e-06, "loss": 0.367, "step": 1683 }, { "epoch": 1.4123567235113224, "grad_norm": 0.34984466462682184, "learning_rate": 6.363895604876488e-06, "loss": 0.3085, "step": 1684 }, { "epoch": 1.4131954151523622, "grad_norm": 0.40436262236208376, "learning_rate": 6.35919878872078e-06, "loss": 0.382, "step": 1685 }, { "epoch": 1.4140341067934024, "grad_norm": 0.3811393280934649, "learning_rate": 6.354500677145032e-06, "loss": 0.3041, "step": 1686 }, { "epoch": 1.4148727984344422, "grad_norm": 0.40288558445449885, "learning_rate": 6.349801274626908e-06, "loss": 0.3413, "step": 1687 }, { "epoch": 1.4157114900754824, "grad_norm": 0.3532661078368844, "learning_rate": 6.345100585645294e-06, "loss": 0.3335, "step": 1688 }, { "epoch": 1.4165501817165222, "grad_norm": 0.42215828289632445, "learning_rate": 6.340398614680304e-06, "loss": 0.3622, "step": 1689 }, { "epoch": 1.4173888733575621, "grad_norm": 0.3652266918729961, "learning_rate": 6.335695366213277e-06, "loss": 0.3099, "step": 1690 }, { "epoch": 1.4182275649986023, "grad_norm": 0.43309202467576174, "learning_rate": 6.330990844726766e-06, "loss": 0.3785, "step": 1691 }, { "epoch": 1.4190662566396421, "grad_norm": 0.3178802064171834, "learning_rate": 6.326285054704538e-06, "loss": 0.298, "step": 1692 }, { "epoch": 1.4199049482806823, "grad_norm": 0.38897292322494903, "learning_rate": 6.321578000631572e-06, "loss": 0.3703, "step": 1693 }, { "epoch": 1.4207436399217221, "grad_norm": 0.38843956161332904, "learning_rate": 6.316869686994045e-06, "loss": 0.3456, "step": 1694 }, { "epoch": 1.421582331562762, "grad_norm": 0.37432650065478895, "learning_rate": 6.312160118279342e-06, "loss": 0.3305, "step": 1695 }, { "epoch": 1.4224210232038021, "grad_norm": 0.3858482232217306, "learning_rate": 6.307449298976042e-06, "loss": 0.3471, "step": 1696 }, { "epoch": 1.423259714844842, "grad_norm": 0.36982039268047207, "learning_rate": 6.302737233573911e-06, "loss": 0.3636, "step": 1697 }, { "epoch": 1.4240984064858822, "grad_norm": 0.4138995495848546, "learning_rate": 6.298023926563909e-06, "loss": 0.3365, "step": 1698 }, { "epoch": 1.424937098126922, "grad_norm": 0.43322845967717233, "learning_rate": 6.293309382438175e-06, "loss": 0.3742, "step": 1699 }, { "epoch": 1.425775789767962, "grad_norm": 0.3209386201968986, "learning_rate": 6.288593605690031e-06, "loss": 0.278, "step": 1700 }, { "epoch": 1.4266144814090018, "grad_norm": 0.38891343087377545, "learning_rate": 6.2838766008139715e-06, "loss": 0.364, "step": 1701 }, { "epoch": 1.427453173050042, "grad_norm": 0.3941479010605898, "learning_rate": 6.27915837230566e-06, "loss": 0.3643, "step": 1702 }, { "epoch": 1.428291864691082, "grad_norm": 0.39882234217033297, "learning_rate": 6.27443892466193e-06, "loss": 0.3411, "step": 1703 }, { "epoch": 1.429130556332122, "grad_norm": 0.3815197627799229, "learning_rate": 6.269718262380771e-06, "loss": 0.3306, "step": 1704 }, { "epoch": 1.4299692479731618, "grad_norm": 0.42599126390437253, "learning_rate": 6.264996389961339e-06, "loss": 0.3626, "step": 1705 }, { "epoch": 1.4308079396142017, "grad_norm": 0.35280869003159515, "learning_rate": 6.260273311903935e-06, "loss": 0.3149, "step": 1706 }, { "epoch": 1.4316466312552418, "grad_norm": 0.3902200377128789, "learning_rate": 6.255549032710012e-06, "loss": 0.3378, "step": 1707 }, { "epoch": 1.4324853228962817, "grad_norm": 0.3734757391272817, "learning_rate": 6.250823556882171e-06, "loss": 0.3177, "step": 1708 }, { "epoch": 1.4333240145373218, "grad_norm": 0.36401250125652385, "learning_rate": 6.246096888924149e-06, "loss": 0.3085, "step": 1709 }, { "epoch": 1.4341627061783617, "grad_norm": 0.3975051412059208, "learning_rate": 6.241369033340818e-06, "loss": 0.3358, "step": 1710 }, { "epoch": 1.4350013978194016, "grad_norm": 0.3903548694016727, "learning_rate": 6.236639994638189e-06, "loss": 0.3539, "step": 1711 }, { "epoch": 1.4358400894604417, "grad_norm": 0.41227780620464083, "learning_rate": 6.231909777323396e-06, "loss": 0.3382, "step": 1712 }, { "epoch": 1.4366787811014816, "grad_norm": 0.35838663280445554, "learning_rate": 6.227178385904695e-06, "loss": 0.3281, "step": 1713 }, { "epoch": 1.4375174727425217, "grad_norm": 0.38932240102760557, "learning_rate": 6.2224458248914625e-06, "loss": 0.3375, "step": 1714 }, { "epoch": 1.4383561643835616, "grad_norm": 0.35279397892008485, "learning_rate": 6.217712098794189e-06, "loss": 0.3061, "step": 1715 }, { "epoch": 1.4391948560246015, "grad_norm": 0.37310941904497896, "learning_rate": 6.2129772121244795e-06, "loss": 0.3377, "step": 1716 }, { "epoch": 1.4400335476656416, "grad_norm": 0.39237448305535166, "learning_rate": 6.208241169395038e-06, "loss": 0.3708, "step": 1717 }, { "epoch": 1.4408722393066815, "grad_norm": 0.3852498831081042, "learning_rate": 6.203503975119678e-06, "loss": 0.3246, "step": 1718 }, { "epoch": 1.4417109309477216, "grad_norm": 0.35555697098812855, "learning_rate": 6.198765633813303e-06, "loss": 0.3431, "step": 1719 }, { "epoch": 1.4425496225887615, "grad_norm": 0.33959128366833125, "learning_rate": 6.1940261499919164e-06, "loss": 0.3109, "step": 1720 }, { "epoch": 1.4433883142298014, "grad_norm": 0.3868446649669096, "learning_rate": 6.189285528172607e-06, "loss": 0.3277, "step": 1721 }, { "epoch": 1.4442270058708415, "grad_norm": 0.3552923401936332, "learning_rate": 6.1845437728735466e-06, "loss": 0.3289, "step": 1722 }, { "epoch": 1.4450656975118814, "grad_norm": 0.3569178243213375, "learning_rate": 6.179800888613992e-06, "loss": 0.3053, "step": 1723 }, { "epoch": 1.4459043891529215, "grad_norm": 0.3830199106040434, "learning_rate": 6.17505687991427e-06, "loss": 0.33, "step": 1724 }, { "epoch": 1.4467430807939614, "grad_norm": 0.3860769127379725, "learning_rate": 6.170311751295786e-06, "loss": 0.3651, "step": 1725 }, { "epoch": 1.4475817724350013, "grad_norm": 0.34220970669343775, "learning_rate": 6.1655655072810085e-06, "loss": 0.2941, "step": 1726 }, { "epoch": 1.4484204640760414, "grad_norm": 0.3788339239003601, "learning_rate": 6.160818152393468e-06, "loss": 0.3271, "step": 1727 }, { "epoch": 1.4492591557170813, "grad_norm": 0.37739277016864414, "learning_rate": 6.156069691157757e-06, "loss": 0.3467, "step": 1728 }, { "epoch": 1.4500978473581214, "grad_norm": 0.3475383812845217, "learning_rate": 6.15132012809952e-06, "loss": 0.3059, "step": 1729 }, { "epoch": 1.4509365389991613, "grad_norm": 0.3795662768053155, "learning_rate": 6.146569467745453e-06, "loss": 0.3527, "step": 1730 }, { "epoch": 1.4517752306402012, "grad_norm": 0.40864185652770085, "learning_rate": 6.141817714623299e-06, "loss": 0.3428, "step": 1731 }, { "epoch": 1.4526139222812413, "grad_norm": 0.44051401153436043, "learning_rate": 6.137064873261839e-06, "loss": 0.3464, "step": 1732 }, { "epoch": 1.4534526139222812, "grad_norm": 0.35562795409836234, "learning_rate": 6.1323109481908925e-06, "loss": 0.3285, "step": 1733 }, { "epoch": 1.4542913055633213, "grad_norm": 0.4018182316606766, "learning_rate": 6.127555943941315e-06, "loss": 0.3534, "step": 1734 }, { "epoch": 1.4551299972043612, "grad_norm": 0.3443686420416655, "learning_rate": 6.122799865044985e-06, "loss": 0.2982, "step": 1735 }, { "epoch": 1.455968688845401, "grad_norm": 0.3795606775495842, "learning_rate": 6.1180427160348114e-06, "loss": 0.3649, "step": 1736 }, { "epoch": 1.4568073804864412, "grad_norm": 0.3465806582196444, "learning_rate": 6.113284501444715e-06, "loss": 0.3113, "step": 1737 }, { "epoch": 1.457646072127481, "grad_norm": 0.3871050424950981, "learning_rate": 6.108525225809642e-06, "loss": 0.3045, "step": 1738 }, { "epoch": 1.4584847637685212, "grad_norm": 0.43802672253508046, "learning_rate": 6.103764893665542e-06, "loss": 0.424, "step": 1739 }, { "epoch": 1.459323455409561, "grad_norm": 0.35448688761765, "learning_rate": 6.099003509549376e-06, "loss": 0.3089, "step": 1740 }, { "epoch": 1.460162147050601, "grad_norm": 0.36399125922017733, "learning_rate": 6.094241077999103e-06, "loss": 0.3114, "step": 1741 }, { "epoch": 1.461000838691641, "grad_norm": 0.39986729736424353, "learning_rate": 6.089477603553685e-06, "loss": 0.3736, "step": 1742 }, { "epoch": 1.461839530332681, "grad_norm": 0.3693841955213201, "learning_rate": 6.0847130907530786e-06, "loss": 0.3025, "step": 1743 }, { "epoch": 1.462678221973721, "grad_norm": 0.3590602812659985, "learning_rate": 6.079947544138225e-06, "loss": 0.3193, "step": 1744 }, { "epoch": 1.463516913614761, "grad_norm": 0.44159754455274447, "learning_rate": 6.075180968251052e-06, "loss": 0.3124, "step": 1745 }, { "epoch": 1.4643556052558009, "grad_norm": 0.3753270575088682, "learning_rate": 6.070413367634474e-06, "loss": 0.3074, "step": 1746 }, { "epoch": 1.465194296896841, "grad_norm": 0.43187349764668204, "learning_rate": 6.065644746832373e-06, "loss": 0.3784, "step": 1747 }, { "epoch": 1.4660329885378809, "grad_norm": 0.35440679079541504, "learning_rate": 6.060875110389611e-06, "loss": 0.3197, "step": 1748 }, { "epoch": 1.466871680178921, "grad_norm": 0.4104176350333504, "learning_rate": 6.056104462852015e-06, "loss": 0.3568, "step": 1749 }, { "epoch": 1.467710371819961, "grad_norm": 0.3727470236505403, "learning_rate": 6.051332808766374e-06, "loss": 0.3228, "step": 1750 }, { "epoch": 1.4685490634610008, "grad_norm": 0.3784881529341881, "learning_rate": 6.0465601526804395e-06, "loss": 0.312, "step": 1751 }, { "epoch": 1.469387755102041, "grad_norm": 0.3797159846211502, "learning_rate": 6.041786499142916e-06, "loss": 0.3319, "step": 1752 }, { "epoch": 1.4702264467430808, "grad_norm": 0.3887223163746989, "learning_rate": 6.0370118527034585e-06, "loss": 0.3354, "step": 1753 }, { "epoch": 1.471065138384121, "grad_norm": 0.37902385534413885, "learning_rate": 6.0322362179126695e-06, "loss": 0.3276, "step": 1754 }, { "epoch": 1.4719038300251608, "grad_norm": 0.3873800047691687, "learning_rate": 6.027459599322094e-06, "loss": 0.3261, "step": 1755 }, { "epoch": 1.4727425216662007, "grad_norm": 0.36068041655028377, "learning_rate": 6.0226820014842125e-06, "loss": 0.3325, "step": 1756 }, { "epoch": 1.4735812133072406, "grad_norm": 0.3502825765910772, "learning_rate": 6.017903428952441e-06, "loss": 0.3476, "step": 1757 }, { "epoch": 1.4744199049482807, "grad_norm": 0.37719274916988715, "learning_rate": 6.013123886281121e-06, "loss": 0.3234, "step": 1758 }, { "epoch": 1.4752585965893208, "grad_norm": 0.37434986320541724, "learning_rate": 6.008343378025522e-06, "loss": 0.3182, "step": 1759 }, { "epoch": 1.4760972882303607, "grad_norm": 0.3822264517286762, "learning_rate": 6.003561908741833e-06, "loss": 0.3405, "step": 1760 }, { "epoch": 1.4769359798714006, "grad_norm": 0.3866438174935489, "learning_rate": 5.998779482987159e-06, "loss": 0.3385, "step": 1761 }, { "epoch": 1.4777746715124405, "grad_norm": 0.34966290795692134, "learning_rate": 5.993996105319516e-06, "loss": 0.3138, "step": 1762 }, { "epoch": 1.4786133631534806, "grad_norm": 0.4291322589331104, "learning_rate": 5.989211780297827e-06, "loss": 0.3621, "step": 1763 }, { "epoch": 1.4794520547945205, "grad_norm": 0.3594252576054584, "learning_rate": 5.984426512481919e-06, "loss": 0.2973, "step": 1764 }, { "epoch": 1.4802907464355606, "grad_norm": 0.3949417706119665, "learning_rate": 5.979640306432517e-06, "loss": 0.3524, "step": 1765 }, { "epoch": 1.4811294380766005, "grad_norm": 0.36725925640285795, "learning_rate": 5.974853166711238e-06, "loss": 0.3184, "step": 1766 }, { "epoch": 1.4819681297176404, "grad_norm": 0.3673213518390183, "learning_rate": 5.970065097880593e-06, "loss": 0.3136, "step": 1767 }, { "epoch": 1.4828068213586805, "grad_norm": 0.4117291556945495, "learning_rate": 5.965276104503979e-06, "loss": 0.3529, "step": 1768 }, { "epoch": 1.4836455129997204, "grad_norm": 0.3717853757657988, "learning_rate": 5.960486191145667e-06, "loss": 0.3276, "step": 1769 }, { "epoch": 1.4844842046407605, "grad_norm": 0.36329673623317515, "learning_rate": 5.955695362370813e-06, "loss": 0.3404, "step": 1770 }, { "epoch": 1.4853228962818004, "grad_norm": 0.3832279212290885, "learning_rate": 5.950903622745437e-06, "loss": 0.3307, "step": 1771 }, { "epoch": 1.4861615879228403, "grad_norm": 0.3719803598752646, "learning_rate": 5.946110976836439e-06, "loss": 0.334, "step": 1772 }, { "epoch": 1.4870002795638804, "grad_norm": 0.34049397942979187, "learning_rate": 5.941317429211571e-06, "loss": 0.3247, "step": 1773 }, { "epoch": 1.4878389712049203, "grad_norm": 0.35431074496483594, "learning_rate": 5.93652298443945e-06, "loss": 0.3658, "step": 1774 }, { "epoch": 1.4886776628459604, "grad_norm": 0.38370376826395663, "learning_rate": 5.931727647089549e-06, "loss": 0.3357, "step": 1775 }, { "epoch": 1.4895163544870003, "grad_norm": 0.3564616040678334, "learning_rate": 5.9269314217321875e-06, "loss": 0.341, "step": 1776 }, { "epoch": 1.4903550461280402, "grad_norm": 0.4111041708059229, "learning_rate": 5.922134312938537e-06, "loss": 0.3341, "step": 1777 }, { "epoch": 1.4911937377690803, "grad_norm": 0.3763524934204799, "learning_rate": 5.917336325280605e-06, "loss": 0.3193, "step": 1778 }, { "epoch": 1.4920324294101202, "grad_norm": 0.41852125612343866, "learning_rate": 5.912537463331238e-06, "loss": 0.3294, "step": 1779 }, { "epoch": 1.4928711210511603, "grad_norm": 0.4337657912537945, "learning_rate": 5.907737731664121e-06, "loss": 0.382, "step": 1780 }, { "epoch": 1.4937098126922002, "grad_norm": 0.3827097580550891, "learning_rate": 5.902937134853764e-06, "loss": 0.328, "step": 1781 }, { "epoch": 1.49454850433324, "grad_norm": 0.3925304424827078, "learning_rate": 5.8981356774755e-06, "loss": 0.3557, "step": 1782 }, { "epoch": 1.4953871959742802, "grad_norm": 0.35654251193462133, "learning_rate": 5.893333364105484e-06, "loss": 0.3138, "step": 1783 }, { "epoch": 1.49622588761532, "grad_norm": 0.36550577405378426, "learning_rate": 5.888530199320685e-06, "loss": 0.3254, "step": 1784 }, { "epoch": 1.4970645792563602, "grad_norm": 0.3907869522849275, "learning_rate": 5.883726187698887e-06, "loss": 0.341, "step": 1785 }, { "epoch": 1.4979032708974, "grad_norm": 0.3804928518100367, "learning_rate": 5.87892133381868e-06, "loss": 0.321, "step": 1786 }, { "epoch": 1.49874196253844, "grad_norm": 0.38833550849068826, "learning_rate": 5.8741156422594555e-06, "loss": 0.329, "step": 1787 }, { "epoch": 1.49958065417948, "grad_norm": 0.39941463705618124, "learning_rate": 5.8693091176014025e-06, "loss": 0.2979, "step": 1788 }, { "epoch": 1.50041934582052, "grad_norm": 0.4008748429524392, "learning_rate": 5.8645017644255065e-06, "loss": 0.3498, "step": 1789 }, { "epoch": 1.50125803746156, "grad_norm": 0.31229953280843065, "learning_rate": 5.859693587313542e-06, "loss": 0.2981, "step": 1790 }, { "epoch": 1.5020967291026, "grad_norm": 0.42173241099519737, "learning_rate": 5.8548845908480645e-06, "loss": 0.3846, "step": 1791 }, { "epoch": 1.5029354207436398, "grad_norm": 0.3762622574511806, "learning_rate": 5.8500747796124194e-06, "loss": 0.3045, "step": 1792 }, { "epoch": 1.50377411238468, "grad_norm": 0.3440432124905993, "learning_rate": 5.84526415819072e-06, "loss": 0.3164, "step": 1793 }, { "epoch": 1.5046128040257198, "grad_norm": 0.35399938306942225, "learning_rate": 5.840452731167856e-06, "loss": 0.3263, "step": 1794 }, { "epoch": 1.50545149566676, "grad_norm": 0.36974113565308436, "learning_rate": 5.835640503129485e-06, "loss": 0.3302, "step": 1795 }, { "epoch": 1.5062901873077998, "grad_norm": 0.4016723492807966, "learning_rate": 5.830827478662027e-06, "loss": 0.3752, "step": 1796 }, { "epoch": 1.5071288789488397, "grad_norm": 0.36264329044644583, "learning_rate": 5.8260136623526575e-06, "loss": 0.3216, "step": 1797 }, { "epoch": 1.5079675705898796, "grad_norm": 0.37003630770119794, "learning_rate": 5.821199058789315e-06, "loss": 0.3427, "step": 1798 }, { "epoch": 1.5088062622309197, "grad_norm": 0.36732289095340676, "learning_rate": 5.8163836725606816e-06, "loss": 0.3228, "step": 1799 }, { "epoch": 1.5096449538719598, "grad_norm": 0.3481688789898968, "learning_rate": 5.811567508256188e-06, "loss": 0.3324, "step": 1800 }, { "epoch": 1.5104836455129997, "grad_norm": 0.36515511440122883, "learning_rate": 5.8067505704660054e-06, "loss": 0.3278, "step": 1801 }, { "epoch": 1.5113223371540396, "grad_norm": 0.39207749412053333, "learning_rate": 5.801932863781044e-06, "loss": 0.3285, "step": 1802 }, { "epoch": 1.5121610287950795, "grad_norm": 0.3652749844374837, "learning_rate": 5.797114392792942e-06, "loss": 0.351, "step": 1803 }, { "epoch": 1.5129997204361196, "grad_norm": 0.35163728161760494, "learning_rate": 5.792295162094073e-06, "loss": 0.3175, "step": 1804 }, { "epoch": 1.5138384120771597, "grad_norm": 0.41311595833944004, "learning_rate": 5.7874751762775285e-06, "loss": 0.3495, "step": 1805 }, { "epoch": 1.5146771037181996, "grad_norm": 0.36009997369377317, "learning_rate": 5.782654439937124e-06, "loss": 0.3004, "step": 1806 }, { "epoch": 1.5155157953592395, "grad_norm": 0.36589030093477226, "learning_rate": 5.77783295766739e-06, "loss": 0.3419, "step": 1807 }, { "epoch": 1.5163544870002794, "grad_norm": 0.37462288455174725, "learning_rate": 5.773010734063564e-06, "loss": 0.3063, "step": 1808 }, { "epoch": 1.5171931786413195, "grad_norm": 0.38707627106626086, "learning_rate": 5.768187773721592e-06, "loss": 0.3604, "step": 1809 }, { "epoch": 1.5180318702823596, "grad_norm": 0.36557907139374074, "learning_rate": 5.763364081238125e-06, "loss": 0.318, "step": 1810 }, { "epoch": 1.5188705619233995, "grad_norm": 0.36489321577104017, "learning_rate": 5.7585396612105095e-06, "loss": 0.299, "step": 1811 }, { "epoch": 1.5197092535644394, "grad_norm": 0.3868651680526864, "learning_rate": 5.753714518236786e-06, "loss": 0.3702, "step": 1812 }, { "epoch": 1.5205479452054793, "grad_norm": 0.35711467902172206, "learning_rate": 5.748888656915681e-06, "loss": 0.304, "step": 1813 }, { "epoch": 1.5213866368465194, "grad_norm": 0.4091263729170678, "learning_rate": 5.744062081846608e-06, "loss": 0.3438, "step": 1814 }, { "epoch": 1.5222253284875595, "grad_norm": 0.33049078852221925, "learning_rate": 5.739234797629665e-06, "loss": 0.3038, "step": 1815 }, { "epoch": 1.5230640201285994, "grad_norm": 0.3811882676778704, "learning_rate": 5.734406808865618e-06, "loss": 0.3502, "step": 1816 }, { "epoch": 1.5239027117696393, "grad_norm": 0.3614727163320119, "learning_rate": 5.7295781201559095e-06, "loss": 0.3236, "step": 1817 }, { "epoch": 1.5247414034106792, "grad_norm": 0.38138348185058135, "learning_rate": 5.724748736102648e-06, "loss": 0.3389, "step": 1818 }, { "epoch": 1.5255800950517193, "grad_norm": 0.379523339096977, "learning_rate": 5.719918661308604e-06, "loss": 0.3449, "step": 1819 }, { "epoch": 1.5264187866927594, "grad_norm": 0.3687190671946007, "learning_rate": 5.715087900377208e-06, "loss": 0.3614, "step": 1820 }, { "epoch": 1.5272574783337993, "grad_norm": 0.38234964891384127, "learning_rate": 5.710256457912542e-06, "loss": 0.3111, "step": 1821 }, { "epoch": 1.5280961699748392, "grad_norm": 0.39024696914050633, "learning_rate": 5.705424338519338e-06, "loss": 0.3832, "step": 1822 }, { "epoch": 1.528934861615879, "grad_norm": 0.3237877405312159, "learning_rate": 5.700591546802977e-06, "loss": 0.3118, "step": 1823 }, { "epoch": 1.5297735532569192, "grad_norm": 0.36932958690745404, "learning_rate": 5.6957580873694765e-06, "loss": 0.3466, "step": 1824 }, { "epoch": 1.5306122448979593, "grad_norm": 0.3817816778336724, "learning_rate": 5.690923964825492e-06, "loss": 0.322, "step": 1825 }, { "epoch": 1.5314509365389992, "grad_norm": 0.3755713374573333, "learning_rate": 5.6860891837783105e-06, "loss": 0.3272, "step": 1826 }, { "epoch": 1.532289628180039, "grad_norm": 0.36681027292810053, "learning_rate": 5.681253748835844e-06, "loss": 0.3534, "step": 1827 }, { "epoch": 1.533128319821079, "grad_norm": 0.3415139132154272, "learning_rate": 5.676417664606636e-06, "loss": 0.3088, "step": 1828 }, { "epoch": 1.5339670114621191, "grad_norm": 0.3828122406759068, "learning_rate": 5.67158093569984e-06, "loss": 0.3178, "step": 1829 }, { "epoch": 1.5348057031031592, "grad_norm": 0.4044036424851614, "learning_rate": 5.666743566725227e-06, "loss": 0.347, "step": 1830 }, { "epoch": 1.5356443947441991, "grad_norm": 0.37657297473458073, "learning_rate": 5.6619055622931806e-06, "loss": 0.3133, "step": 1831 }, { "epoch": 1.536483086385239, "grad_norm": 0.38957839088388274, "learning_rate": 5.657066927014684e-06, "loss": 0.3552, "step": 1832 }, { "epoch": 1.537321778026279, "grad_norm": 0.3491523301026894, "learning_rate": 5.65222766550133e-06, "loss": 0.3292, "step": 1833 }, { "epoch": 1.538160469667319, "grad_norm": 0.3437169625668065, "learning_rate": 5.647387782365299e-06, "loss": 0.3001, "step": 1834 }, { "epoch": 1.5389991613083591, "grad_norm": 0.3517656784183983, "learning_rate": 5.642547282219373e-06, "loss": 0.3479, "step": 1835 }, { "epoch": 1.539837852949399, "grad_norm": 0.3548370374720716, "learning_rate": 5.637706169676913e-06, "loss": 0.3137, "step": 1836 }, { "epoch": 1.540676544590439, "grad_norm": 0.3762973797000973, "learning_rate": 5.632864449351873e-06, "loss": 0.3515, "step": 1837 }, { "epoch": 1.5415152362314788, "grad_norm": 0.3986894383349687, "learning_rate": 5.628022125858778e-06, "loss": 0.326, "step": 1838 }, { "epoch": 1.542353927872519, "grad_norm": 0.3340663316926016, "learning_rate": 5.623179203812733e-06, "loss": 0.3147, "step": 1839 }, { "epoch": 1.5431926195135588, "grad_norm": 0.3762125704951095, "learning_rate": 5.61833568782941e-06, "loss": 0.3557, "step": 1840 }, { "epoch": 1.544031311154599, "grad_norm": 0.39432481525206486, "learning_rate": 5.61349158252505e-06, "loss": 0.3628, "step": 1841 }, { "epoch": 1.5448700027956388, "grad_norm": 0.37593243960568345, "learning_rate": 5.608646892516456e-06, "loss": 0.3282, "step": 1842 }, { "epoch": 1.5457086944366787, "grad_norm": 0.34082283541623387, "learning_rate": 5.6038016224209845e-06, "loss": 0.3062, "step": 1843 }, { "epoch": 1.5465473860777188, "grad_norm": 0.4009590687798213, "learning_rate": 5.598955776856549e-06, "loss": 0.3631, "step": 1844 }, { "epoch": 1.5473860777187587, "grad_norm": 0.34408010125969063, "learning_rate": 5.594109360441609e-06, "loss": 0.3259, "step": 1845 }, { "epoch": 1.5482247693597988, "grad_norm": 0.3769158081770086, "learning_rate": 5.589262377795167e-06, "loss": 0.3446, "step": 1846 }, { "epoch": 1.5490634610008387, "grad_norm": 0.3379972964676502, "learning_rate": 5.584414833536768e-06, "loss": 0.3174, "step": 1847 }, { "epoch": 1.5499021526418786, "grad_norm": 0.36391772652433485, "learning_rate": 5.5795667322864944e-06, "loss": 0.3255, "step": 1848 }, { "epoch": 1.5507408442829187, "grad_norm": 0.38081695041602337, "learning_rate": 5.574718078664951e-06, "loss": 0.3459, "step": 1849 }, { "epoch": 1.5515795359239586, "grad_norm": 0.356361880896244, "learning_rate": 5.569868877293277e-06, "loss": 0.3274, "step": 1850 }, { "epoch": 1.5524182275649987, "grad_norm": 0.3299017224080633, "learning_rate": 5.565019132793132e-06, "loss": 0.3146, "step": 1851 }, { "epoch": 1.5532569192060386, "grad_norm": 0.3747358047116327, "learning_rate": 5.560168849786689e-06, "loss": 0.3482, "step": 1852 }, { "epoch": 1.5540956108470785, "grad_norm": 0.3294860565730922, "learning_rate": 5.55531803289664e-06, "loss": 0.297, "step": 1853 }, { "epoch": 1.5549343024881184, "grad_norm": 0.35706836122027846, "learning_rate": 5.55046668674618e-06, "loss": 0.3759, "step": 1854 }, { "epoch": 1.5557729941291585, "grad_norm": 0.3915130687577438, "learning_rate": 5.545614815959015e-06, "loss": 0.3015, "step": 1855 }, { "epoch": 1.5566116857701986, "grad_norm": 0.35120957847941603, "learning_rate": 5.540762425159347e-06, "loss": 0.3351, "step": 1856 }, { "epoch": 1.5574503774112385, "grad_norm": 0.41206780351536215, "learning_rate": 5.5359095189718736e-06, "loss": 0.3578, "step": 1857 }, { "epoch": 1.5582890690522784, "grad_norm": 0.3982097532690349, "learning_rate": 5.531056102021784e-06, "loss": 0.3349, "step": 1858 }, { "epoch": 1.5591277606933183, "grad_norm": 0.38634568724919954, "learning_rate": 5.526202178934753e-06, "loss": 0.3257, "step": 1859 }, { "epoch": 1.5599664523343584, "grad_norm": 0.4060090077717636, "learning_rate": 5.521347754336943e-06, "loss": 0.3464, "step": 1860 }, { "epoch": 1.5608051439753985, "grad_norm": 0.36691284269227387, "learning_rate": 5.516492832854989e-06, "loss": 0.3627, "step": 1861 }, { "epoch": 1.5616438356164384, "grad_norm": 0.3553251340184088, "learning_rate": 5.511637419116e-06, "loss": 0.3184, "step": 1862 }, { "epoch": 1.5624825272574783, "grad_norm": 0.3844666427637523, "learning_rate": 5.506781517747557e-06, "loss": 0.3213, "step": 1863 }, { "epoch": 1.5633212188985182, "grad_norm": 0.3820875034174032, "learning_rate": 5.501925133377702e-06, "loss": 0.3663, "step": 1864 }, { "epoch": 1.5641599105395583, "grad_norm": 0.356496632863223, "learning_rate": 5.497068270634942e-06, "loss": 0.3168, "step": 1865 }, { "epoch": 1.5649986021805984, "grad_norm": 0.3886497887099178, "learning_rate": 5.492210934148235e-06, "loss": 0.3594, "step": 1866 }, { "epoch": 1.5658372938216383, "grad_norm": 0.3631061022548458, "learning_rate": 5.487353128546994e-06, "loss": 0.3277, "step": 1867 }, { "epoch": 1.5666759854626782, "grad_norm": 0.3909527213736203, "learning_rate": 5.482494858461079e-06, "loss": 0.3304, "step": 1868 }, { "epoch": 1.567514677103718, "grad_norm": 0.39681378242289056, "learning_rate": 5.47763612852079e-06, "loss": 0.3071, "step": 1869 }, { "epoch": 1.5683533687447582, "grad_norm": 0.4112951047636464, "learning_rate": 5.472776943356867e-06, "loss": 0.3846, "step": 1870 }, { "epoch": 1.5691920603857983, "grad_norm": 0.34008041299965436, "learning_rate": 5.467917307600485e-06, "loss": 0.3033, "step": 1871 }, { "epoch": 1.5700307520268382, "grad_norm": 0.34495329447732015, "learning_rate": 5.463057225883246e-06, "loss": 0.3005, "step": 1872 }, { "epoch": 1.570869443667878, "grad_norm": 0.3671884323585423, "learning_rate": 5.458196702837179e-06, "loss": 0.3221, "step": 1873 }, { "epoch": 1.571708135308918, "grad_norm": 0.3857232935527896, "learning_rate": 5.453335743094734e-06, "loss": 0.3269, "step": 1874 }, { "epoch": 1.572546826949958, "grad_norm": 0.39724821095795554, "learning_rate": 5.448474351288774e-06, "loss": 0.3186, "step": 1875 }, { "epoch": 1.5733855185909982, "grad_norm": 0.40992991489881875, "learning_rate": 5.443612532052577e-06, "loss": 0.3485, "step": 1876 }, { "epoch": 1.574224210232038, "grad_norm": 0.35857874134402934, "learning_rate": 5.4387502900198276e-06, "loss": 0.3408, "step": 1877 }, { "epoch": 1.575062901873078, "grad_norm": 0.3827305536308299, "learning_rate": 5.43388762982461e-06, "loss": 0.3006, "step": 1878 }, { "epoch": 1.5759015935141178, "grad_norm": 0.45506884644581586, "learning_rate": 5.429024556101414e-06, "loss": 0.366, "step": 1879 }, { "epoch": 1.576740285155158, "grad_norm": 0.38155624546535233, "learning_rate": 5.424161073485119e-06, "loss": 0.338, "step": 1880 }, { "epoch": 1.577578976796198, "grad_norm": 0.3339719665431755, "learning_rate": 5.4192971866109925e-06, "loss": 0.3089, "step": 1881 }, { "epoch": 1.578417668437238, "grad_norm": 0.40217661277606503, "learning_rate": 5.414432900114691e-06, "loss": 0.3314, "step": 1882 }, { "epoch": 1.5792563600782779, "grad_norm": 0.4354733860576515, "learning_rate": 5.4095682186322495e-06, "loss": 0.3448, "step": 1883 }, { "epoch": 1.5800950517193177, "grad_norm": 0.4124067926863641, "learning_rate": 5.404703146800079e-06, "loss": 0.372, "step": 1884 }, { "epoch": 1.5809337433603579, "grad_norm": 0.34474111648225353, "learning_rate": 5.399837689254967e-06, "loss": 0.2815, "step": 1885 }, { "epoch": 1.581772435001398, "grad_norm": 0.3659572007078123, "learning_rate": 5.3949718506340645e-06, "loss": 0.341, "step": 1886 }, { "epoch": 1.5826111266424379, "grad_norm": 0.42726206060571426, "learning_rate": 5.3901056355748835e-06, "loss": 0.3222, "step": 1887 }, { "epoch": 1.5834498182834778, "grad_norm": 0.4291928134432849, "learning_rate": 5.385239048715301e-06, "loss": 0.3287, "step": 1888 }, { "epoch": 1.5842885099245176, "grad_norm": 0.3831078664994417, "learning_rate": 5.380372094693544e-06, "loss": 0.3359, "step": 1889 }, { "epoch": 1.5851272015655578, "grad_norm": 0.3705042352091911, "learning_rate": 5.37550477814819e-06, "loss": 0.3224, "step": 1890 }, { "epoch": 1.5859658932065979, "grad_norm": 0.4788961468908488, "learning_rate": 5.370637103718164e-06, "loss": 0.3502, "step": 1891 }, { "epoch": 1.5868045848476378, "grad_norm": 0.4232117083096664, "learning_rate": 5.36576907604273e-06, "loss": 0.344, "step": 1892 }, { "epoch": 1.5876432764886776, "grad_norm": 0.3702835102264255, "learning_rate": 5.3609006997614884e-06, "loss": 0.3157, "step": 1893 }, { "epoch": 1.5884819681297175, "grad_norm": 0.4274531102895756, "learning_rate": 5.356031979514375e-06, "loss": 0.3437, "step": 1894 }, { "epoch": 1.5893206597707576, "grad_norm": 0.4201657899212173, "learning_rate": 5.351162919941651e-06, "loss": 0.3287, "step": 1895 }, { "epoch": 1.5901593514117978, "grad_norm": 0.4283583503295835, "learning_rate": 5.3462935256838996e-06, "loss": 0.3302, "step": 1896 }, { "epoch": 1.5909980430528377, "grad_norm": 0.38018151706396996, "learning_rate": 5.341423801382025e-06, "loss": 0.3425, "step": 1897 }, { "epoch": 1.5918367346938775, "grad_norm": 0.3991711683662137, "learning_rate": 5.336553751677247e-06, "loss": 0.3278, "step": 1898 }, { "epoch": 1.5926754263349174, "grad_norm": 0.38208578915994656, "learning_rate": 5.331683381211092e-06, "loss": 0.2963, "step": 1899 }, { "epoch": 1.5935141179759575, "grad_norm": 0.4157900939146267, "learning_rate": 5.3268126946253975e-06, "loss": 0.3425, "step": 1900 }, { "epoch": 1.5943528096169974, "grad_norm": 0.3399855677051213, "learning_rate": 5.321941696562295e-06, "loss": 0.3306, "step": 1901 }, { "epoch": 1.5951915012580375, "grad_norm": 0.3778925657473484, "learning_rate": 5.3170703916642205e-06, "loss": 0.3363, "step": 1902 }, { "epoch": 1.5960301928990774, "grad_norm": 0.3609009897476988, "learning_rate": 5.3121987845738966e-06, "loss": 0.3075, "step": 1903 }, { "epoch": 1.5968688845401173, "grad_norm": 0.38601676233545595, "learning_rate": 5.307326879934337e-06, "loss": 0.3247, "step": 1904 }, { "epoch": 1.5977075761811574, "grad_norm": 0.3854295402794258, "learning_rate": 5.302454682388837e-06, "loss": 0.3526, "step": 1905 }, { "epoch": 1.5985462678221973, "grad_norm": 0.36677596263540463, "learning_rate": 5.297582196580974e-06, "loss": 0.3108, "step": 1906 }, { "epoch": 1.5993849594632374, "grad_norm": 0.37479827240508384, "learning_rate": 5.292709427154598e-06, "loss": 0.3443, "step": 1907 }, { "epoch": 1.6002236511042773, "grad_norm": 0.39324026510327165, "learning_rate": 5.287836378753826e-06, "loss": 0.3592, "step": 1908 }, { "epoch": 1.6010623427453172, "grad_norm": 0.3491081788347801, "learning_rate": 5.28296305602305e-06, "loss": 0.3144, "step": 1909 }, { "epoch": 1.6019010343863573, "grad_norm": 0.37696131855419535, "learning_rate": 5.278089463606917e-06, "loss": 0.3374, "step": 1910 }, { "epoch": 1.6027397260273972, "grad_norm": 0.373098181463218, "learning_rate": 5.2732156061503295e-06, "loss": 0.3072, "step": 1911 }, { "epoch": 1.6035784176684373, "grad_norm": 0.35050780372159457, "learning_rate": 5.268341488298446e-06, "loss": 0.3352, "step": 1912 }, { "epoch": 1.6044171093094772, "grad_norm": 0.3862245828135779, "learning_rate": 5.263467114696673e-06, "loss": 0.314, "step": 1913 }, { "epoch": 1.6052558009505171, "grad_norm": 0.3925548883532242, "learning_rate": 5.2585924899906585e-06, "loss": 0.3357, "step": 1914 }, { "epoch": 1.606094492591557, "grad_norm": 0.3529620440524855, "learning_rate": 5.253717618826292e-06, "loss": 0.326, "step": 1915 }, { "epoch": 1.6069331842325971, "grad_norm": 0.4099351550898009, "learning_rate": 5.248842505849699e-06, "loss": 0.3635, "step": 1916 }, { "epoch": 1.6077718758736372, "grad_norm": 0.39366494126261214, "learning_rate": 5.243967155707232e-06, "loss": 0.339, "step": 1917 }, { "epoch": 1.6086105675146771, "grad_norm": 0.38943731245945745, "learning_rate": 5.2390915730454715e-06, "loss": 0.3258, "step": 1918 }, { "epoch": 1.609449259155717, "grad_norm": 0.3763048755597518, "learning_rate": 5.234215762511221e-06, "loss": 0.3406, "step": 1919 }, { "epoch": 1.610287950796757, "grad_norm": 0.40743570923322514, "learning_rate": 5.229339728751497e-06, "loss": 0.3775, "step": 1920 }, { "epoch": 1.611126642437797, "grad_norm": 0.37031363321092664, "learning_rate": 5.224463476413531e-06, "loss": 0.3399, "step": 1921 }, { "epoch": 1.6119653340788371, "grad_norm": 0.3445629708101783, "learning_rate": 5.219587010144766e-06, "loss": 0.3205, "step": 1922 }, { "epoch": 1.612804025719877, "grad_norm": 0.39910606761432044, "learning_rate": 5.214710334592845e-06, "loss": 0.3613, "step": 1923 }, { "epoch": 1.613642717360917, "grad_norm": 0.35926733219809787, "learning_rate": 5.20983345440561e-06, "loss": 0.3092, "step": 1924 }, { "epoch": 1.6144814090019568, "grad_norm": 0.3608716732080621, "learning_rate": 5.2049563742311016e-06, "loss": 0.3367, "step": 1925 }, { "epoch": 1.615320100642997, "grad_norm": 0.39598790977383985, "learning_rate": 5.200079098717548e-06, "loss": 0.3695, "step": 1926 }, { "epoch": 1.616158792284037, "grad_norm": 0.37956449481172083, "learning_rate": 5.195201632513363e-06, "loss": 0.3326, "step": 1927 }, { "epoch": 1.616997483925077, "grad_norm": 0.34094398773458423, "learning_rate": 5.190323980267144e-06, "loss": 0.3229, "step": 1928 }, { "epoch": 1.6178361755661168, "grad_norm": 0.34772799759795747, "learning_rate": 5.185446146627668e-06, "loss": 0.3054, "step": 1929 }, { "epoch": 1.6186748672071567, "grad_norm": 0.34421341229094865, "learning_rate": 5.180568136243881e-06, "loss": 0.3284, "step": 1930 }, { "epoch": 1.6195135588481968, "grad_norm": 0.3949791488113763, "learning_rate": 5.175689953764897e-06, "loss": 0.3566, "step": 1931 }, { "epoch": 1.620352250489237, "grad_norm": 0.37523696911641585, "learning_rate": 5.170811603839997e-06, "loss": 0.3231, "step": 1932 }, { "epoch": 1.6211909421302768, "grad_norm": 0.3695279668067954, "learning_rate": 5.16593309111862e-06, "loss": 0.3349, "step": 1933 }, { "epoch": 1.6220296337713167, "grad_norm": 0.3585378328268466, "learning_rate": 5.161054420250361e-06, "loss": 0.3215, "step": 1934 }, { "epoch": 1.6228683254123566, "grad_norm": 0.3681050160561461, "learning_rate": 5.156175595884964e-06, "loss": 0.3136, "step": 1935 }, { "epoch": 1.6237070170533967, "grad_norm": 0.3645514255103615, "learning_rate": 5.151296622672323e-06, "loss": 0.3261, "step": 1936 }, { "epoch": 1.6245457086944368, "grad_norm": 0.3622476850108262, "learning_rate": 5.146417505262469e-06, "loss": 0.3374, "step": 1937 }, { "epoch": 1.6253844003354767, "grad_norm": 0.3778055085946979, "learning_rate": 5.141538248305573e-06, "loss": 0.3336, "step": 1938 }, { "epoch": 1.6262230919765166, "grad_norm": 0.34716548149956167, "learning_rate": 5.136658856451938e-06, "loss": 0.2994, "step": 1939 }, { "epoch": 1.6270617836175565, "grad_norm": 0.4050627264551196, "learning_rate": 5.131779334351998e-06, "loss": 0.3534, "step": 1940 }, { "epoch": 1.6279004752585966, "grad_norm": 0.3475875303824395, "learning_rate": 5.126899686656309e-06, "loss": 0.3422, "step": 1941 }, { "epoch": 1.6287391668996367, "grad_norm": 0.3898159256109267, "learning_rate": 5.122019918015547e-06, "loss": 0.3444, "step": 1942 }, { "epoch": 1.6295778585406766, "grad_norm": 0.4243210059964642, "learning_rate": 5.117140033080504e-06, "loss": 0.3534, "step": 1943 }, { "epoch": 1.6304165501817165, "grad_norm": 0.3694091902016322, "learning_rate": 5.11226003650208e-06, "loss": 0.3246, "step": 1944 }, { "epoch": 1.6312552418227564, "grad_norm": 0.37171182264655067, "learning_rate": 5.1073799329312865e-06, "loss": 0.2998, "step": 1945 }, { "epoch": 1.6320939334637965, "grad_norm": 0.39606791109366646, "learning_rate": 5.102499727019233e-06, "loss": 0.3588, "step": 1946 }, { "epoch": 1.6329326251048366, "grad_norm": 0.4133705024944115, "learning_rate": 5.097619423417127e-06, "loss": 0.3281, "step": 1947 }, { "epoch": 1.6337713167458765, "grad_norm": 0.43391331844647657, "learning_rate": 5.092739026776271e-06, "loss": 0.3571, "step": 1948 }, { "epoch": 1.6346100083869164, "grad_norm": 0.4090019130052223, "learning_rate": 5.087858541748054e-06, "loss": 0.3251, "step": 1949 }, { "epoch": 1.6354487000279563, "grad_norm": 0.35756627781632805, "learning_rate": 5.082977972983952e-06, "loss": 0.3222, "step": 1950 }, { "epoch": 1.6362873916689964, "grad_norm": 0.4319507063494484, "learning_rate": 5.078097325135517e-06, "loss": 0.3183, "step": 1951 }, { "epoch": 1.6371260833100365, "grad_norm": 0.4247315857713405, "learning_rate": 5.073216602854377e-06, "loss": 0.3458, "step": 1952 }, { "epoch": 1.6379647749510764, "grad_norm": 0.3582174045188792, "learning_rate": 5.068335810792235e-06, "loss": 0.3174, "step": 1953 }, { "epoch": 1.6388034665921163, "grad_norm": 0.37147225732919775, "learning_rate": 5.063454953600858e-06, "loss": 0.3181, "step": 1954 }, { "epoch": 1.6396421582331562, "grad_norm": 0.41320131977972013, "learning_rate": 5.058574035932074e-06, "loss": 0.3066, "step": 1955 }, { "epoch": 1.6404808498741963, "grad_norm": 0.4309938876550921, "learning_rate": 5.053693062437771e-06, "loss": 0.3636, "step": 1956 }, { "epoch": 1.6413195415152362, "grad_norm": 0.36686184716309267, "learning_rate": 5.0488120377698845e-06, "loss": 0.3112, "step": 1957 }, { "epoch": 1.6421582331562763, "grad_norm": 0.3752810770515136, "learning_rate": 5.043930966580406e-06, "loss": 0.3655, "step": 1958 }, { "epoch": 1.6429969247973162, "grad_norm": 0.3641497663530708, "learning_rate": 5.03904985352137e-06, "loss": 0.3277, "step": 1959 }, { "epoch": 1.643835616438356, "grad_norm": 0.37247632740808917, "learning_rate": 5.034168703244847e-06, "loss": 0.3023, "step": 1960 }, { "epoch": 1.6446743080793962, "grad_norm": 0.3664622115659706, "learning_rate": 5.0292875204029445e-06, "loss": 0.3283, "step": 1961 }, { "epoch": 1.645512999720436, "grad_norm": 0.3518453680830088, "learning_rate": 5.024406309647804e-06, "loss": 0.3349, "step": 1962 }, { "epoch": 1.6463516913614762, "grad_norm": 0.35554309506636295, "learning_rate": 5.01952507563159e-06, "loss": 0.3428, "step": 1963 }, { "epoch": 1.647190383002516, "grad_norm": 0.3956759322745471, "learning_rate": 5.01464382300649e-06, "loss": 0.3381, "step": 1964 }, { "epoch": 1.648029074643556, "grad_norm": 0.35988664517550656, "learning_rate": 5.00976255642471e-06, "loss": 0.3092, "step": 1965 }, { "epoch": 1.648867766284596, "grad_norm": 0.3932740288251157, "learning_rate": 5.0048812805384715e-06, "loss": 0.3688, "step": 1966 }, { "epoch": 1.649706457925636, "grad_norm": 0.34323853174649943, "learning_rate": 5e-06, "loss": 0.2899, "step": 1967 }, { "epoch": 1.650545149566676, "grad_norm": 0.4051523844173709, "learning_rate": 4.995118719461528e-06, "loss": 0.3684, "step": 1968 }, { "epoch": 1.651383841207716, "grad_norm": 0.40622137488597304, "learning_rate": 4.990237443575291e-06, "loss": 0.3514, "step": 1969 }, { "epoch": 1.6522225328487559, "grad_norm": 0.33969729583142055, "learning_rate": 4.985356176993511e-06, "loss": 0.3266, "step": 1970 }, { "epoch": 1.6530612244897958, "grad_norm": 0.37499449946261904, "learning_rate": 4.980474924368413e-06, "loss": 0.3395, "step": 1971 }, { "epoch": 1.6538999161308359, "grad_norm": 0.36419629002227744, "learning_rate": 4.975593690352198e-06, "loss": 0.3097, "step": 1972 }, { "epoch": 1.654738607771876, "grad_norm": 0.35014489138379296, "learning_rate": 4.970712479597056e-06, "loss": 0.3174, "step": 1973 }, { "epoch": 1.6555772994129159, "grad_norm": 0.3447768306317903, "learning_rate": 4.965831296755156e-06, "loss": 0.3394, "step": 1974 }, { "epoch": 1.6564159910539558, "grad_norm": 0.3607715914116031, "learning_rate": 4.960950146478631e-06, "loss": 0.3171, "step": 1975 }, { "epoch": 1.6572546826949957, "grad_norm": 0.3470870409117193, "learning_rate": 4.9560690334195935e-06, "loss": 0.3244, "step": 1976 }, { "epoch": 1.6580933743360358, "grad_norm": 0.37994868289308603, "learning_rate": 4.951187962230117e-06, "loss": 0.3159, "step": 1977 }, { "epoch": 1.6589320659770759, "grad_norm": 0.3671692337818963, "learning_rate": 4.9463069375622304e-06, "loss": 0.3413, "step": 1978 }, { "epoch": 1.6597707576181158, "grad_norm": 0.3931394142440334, "learning_rate": 4.941425964067928e-06, "loss": 0.3357, "step": 1979 }, { "epoch": 1.6606094492591557, "grad_norm": 0.3475283654183467, "learning_rate": 4.9365450463991425e-06, "loss": 0.292, "step": 1980 }, { "epoch": 1.6614481409001955, "grad_norm": 0.4366495814059196, "learning_rate": 4.931664189207765e-06, "loss": 0.3892, "step": 1981 }, { "epoch": 1.6622868325412357, "grad_norm": 0.36124449954918886, "learning_rate": 4.926783397145624e-06, "loss": 0.2848, "step": 1982 }, { "epoch": 1.6631255241822758, "grad_norm": 0.3679868463526068, "learning_rate": 4.921902674864485e-06, "loss": 0.3489, "step": 1983 }, { "epoch": 1.6639642158233157, "grad_norm": 0.35572028541734146, "learning_rate": 4.9170220270160505e-06, "loss": 0.3323, "step": 1984 }, { "epoch": 1.6648029074643556, "grad_norm": 0.3742763918667232, "learning_rate": 4.912141458251947e-06, "loss": 0.3279, "step": 1985 }, { "epoch": 1.6656415991053954, "grad_norm": 0.3925005527078549, "learning_rate": 4.9072609732237296e-06, "loss": 0.3138, "step": 1986 }, { "epoch": 1.6664802907464356, "grad_norm": 0.4188006955265036, "learning_rate": 4.902380576582874e-06, "loss": 0.3438, "step": 1987 }, { "epoch": 1.6673189823874757, "grad_norm": 0.3670618605054817, "learning_rate": 4.897500272980769e-06, "loss": 0.3014, "step": 1988 }, { "epoch": 1.6681576740285156, "grad_norm": 0.4448511295308447, "learning_rate": 4.892620067068714e-06, "loss": 0.3904, "step": 1989 }, { "epoch": 1.6689963656695554, "grad_norm": 0.3539650082425224, "learning_rate": 4.887739963497921e-06, "loss": 0.3021, "step": 1990 }, { "epoch": 1.6698350573105953, "grad_norm": 0.36899601741633264, "learning_rate": 4.882859966919497e-06, "loss": 0.2934, "step": 1991 }, { "epoch": 1.6706737489516355, "grad_norm": 0.37393754400672646, "learning_rate": 4.877980081984455e-06, "loss": 0.3548, "step": 1992 }, { "epoch": 1.6715124405926756, "grad_norm": 0.35735500246623647, "learning_rate": 4.873100313343693e-06, "loss": 0.3385, "step": 1993 }, { "epoch": 1.6723511322337155, "grad_norm": 0.40104095469630147, "learning_rate": 4.868220665648003e-06, "loss": 0.3285, "step": 1994 }, { "epoch": 1.6731898238747553, "grad_norm": 0.4328016530177977, "learning_rate": 4.863341143548063e-06, "loss": 0.3431, "step": 1995 }, { "epoch": 1.6740285155157952, "grad_norm": 0.40271690003396543, "learning_rate": 4.858461751694428e-06, "loss": 0.3441, "step": 1996 }, { "epoch": 1.6748672071568353, "grad_norm": 0.3685927002623365, "learning_rate": 4.853582494737533e-06, "loss": 0.3421, "step": 1997 }, { "epoch": 1.6757058987978755, "grad_norm": 0.4291567954336801, "learning_rate": 4.848703377327679e-06, "loss": 0.3536, "step": 1998 }, { "epoch": 1.6765445904389153, "grad_norm": 0.42598644488848475, "learning_rate": 4.843824404115036e-06, "loss": 0.3188, "step": 1999 }, { "epoch": 1.6773832820799552, "grad_norm": 0.42926224976390726, "learning_rate": 4.838945579749641e-06, "loss": 0.3219, "step": 2000 }, { "epoch": 1.6782219737209951, "grad_norm": 0.3950821799765096, "learning_rate": 4.834066908881382e-06, "loss": 0.3374, "step": 2001 }, { "epoch": 1.6790606653620352, "grad_norm": 0.35337004880981493, "learning_rate": 4.829188396160004e-06, "loss": 0.3098, "step": 2002 }, { "epoch": 1.6798993570030754, "grad_norm": 0.4323114742067778, "learning_rate": 4.824310046235104e-06, "loss": 0.3368, "step": 2003 }, { "epoch": 1.6807380486441152, "grad_norm": 0.4517574787192902, "learning_rate": 4.81943186375612e-06, "loss": 0.3481, "step": 2004 }, { "epoch": 1.6815767402851551, "grad_norm": 0.3417497325664234, "learning_rate": 4.814553853372334e-06, "loss": 0.3231, "step": 2005 }, { "epoch": 1.682415431926195, "grad_norm": 0.33535156463435767, "learning_rate": 4.809676019732857e-06, "loss": 0.3161, "step": 2006 }, { "epoch": 1.6832541235672351, "grad_norm": 0.4098001918445995, "learning_rate": 4.804798367486639e-06, "loss": 0.3213, "step": 2007 }, { "epoch": 1.6840928152082753, "grad_norm": 0.39693272694481835, "learning_rate": 4.799920901282455e-06, "loss": 0.3431, "step": 2008 }, { "epoch": 1.6849315068493151, "grad_norm": 0.35059152379827113, "learning_rate": 4.795043625768899e-06, "loss": 0.2941, "step": 2009 }, { "epoch": 1.685770198490355, "grad_norm": 0.36513900016950124, "learning_rate": 4.790166545594392e-06, "loss": 0.3657, "step": 2010 }, { "epoch": 1.686608890131395, "grad_norm": 0.3656740548396439, "learning_rate": 4.785289665407157e-06, "loss": 0.3035, "step": 2011 }, { "epoch": 1.687447581772435, "grad_norm": 0.3589328502870178, "learning_rate": 4.780412989855234e-06, "loss": 0.3469, "step": 2012 }, { "epoch": 1.688286273413475, "grad_norm": 0.4009954577297503, "learning_rate": 4.775536523586469e-06, "loss": 0.3297, "step": 2013 }, { "epoch": 1.689124965054515, "grad_norm": 0.3486521645939382, "learning_rate": 4.7706602712485045e-06, "loss": 0.3032, "step": 2014 }, { "epoch": 1.689963656695555, "grad_norm": 0.40373692367728875, "learning_rate": 4.76578423748878e-06, "loss": 0.3718, "step": 2015 }, { "epoch": 1.6908023483365948, "grad_norm": 0.37388736120041954, "learning_rate": 4.760908426954529e-06, "loss": 0.3228, "step": 2016 }, { "epoch": 1.691641039977635, "grad_norm": 0.3345367086657766, "learning_rate": 4.7560328442927685e-06, "loss": 0.3192, "step": 2017 }, { "epoch": 1.6924797316186748, "grad_norm": 0.3514712665658989, "learning_rate": 4.751157494150303e-06, "loss": 0.312, "step": 2018 }, { "epoch": 1.693318423259715, "grad_norm": 0.3616369470629371, "learning_rate": 4.746282381173709e-06, "loss": 0.3098, "step": 2019 }, { "epoch": 1.6941571149007548, "grad_norm": 0.3553780317956822, "learning_rate": 4.741407510009343e-06, "loss": 0.288, "step": 2020 }, { "epoch": 1.6949958065417947, "grad_norm": 0.41909802775, "learning_rate": 4.73653288530333e-06, "loss": 0.3689, "step": 2021 }, { "epoch": 1.6958344981828348, "grad_norm": 0.3891291039814148, "learning_rate": 4.731658511701556e-06, "loss": 0.3025, "step": 2022 }, { "epoch": 1.6966731898238747, "grad_norm": 0.3805042185537954, "learning_rate": 4.726784393849673e-06, "loss": 0.3225, "step": 2023 }, { "epoch": 1.6975118814649148, "grad_norm": 0.3456594584470941, "learning_rate": 4.721910536393085e-06, "loss": 0.3406, "step": 2024 }, { "epoch": 1.6983505731059547, "grad_norm": 0.36497122232366475, "learning_rate": 4.71703694397695e-06, "loss": 0.3534, "step": 2025 }, { "epoch": 1.6991892647469946, "grad_norm": 0.3561224463302036, "learning_rate": 4.7121636212461745e-06, "loss": 0.3149, "step": 2026 }, { "epoch": 1.7000279563880345, "grad_norm": 0.40346377928919613, "learning_rate": 4.707290572845404e-06, "loss": 0.3442, "step": 2027 }, { "epoch": 1.7008666480290746, "grad_norm": 0.38378056244463987, "learning_rate": 4.702417803419027e-06, "loss": 0.338, "step": 2028 }, { "epoch": 1.7017053396701147, "grad_norm": 0.3877441123320978, "learning_rate": 4.697545317611165e-06, "loss": 0.3275, "step": 2029 }, { "epoch": 1.7025440313111546, "grad_norm": 0.3460757962359777, "learning_rate": 4.692673120065665e-06, "loss": 0.3008, "step": 2030 }, { "epoch": 1.7033827229521945, "grad_norm": 0.3921806185567242, "learning_rate": 4.687801215426105e-06, "loss": 0.3125, "step": 2031 }, { "epoch": 1.7042214145932344, "grad_norm": 0.40424419950429824, "learning_rate": 4.68292960833578e-06, "loss": 0.3532, "step": 2032 }, { "epoch": 1.7050601062342745, "grad_norm": 0.3949681934078731, "learning_rate": 4.6780583034377045e-06, "loss": 0.3282, "step": 2033 }, { "epoch": 1.7058987978753146, "grad_norm": 0.38567052024686466, "learning_rate": 4.673187305374605e-06, "loss": 0.3375, "step": 2034 }, { "epoch": 1.7067374895163545, "grad_norm": 0.36981020821551275, "learning_rate": 4.6683166187889085e-06, "loss": 0.3478, "step": 2035 }, { "epoch": 1.7075761811573944, "grad_norm": 0.3601644066100207, "learning_rate": 4.663446248322756e-06, "loss": 0.3274, "step": 2036 }, { "epoch": 1.7084148727984343, "grad_norm": 0.33609122160392096, "learning_rate": 4.658576198617977e-06, "loss": 0.2531, "step": 2037 }, { "epoch": 1.7092535644394744, "grad_norm": 0.4467280619688773, "learning_rate": 4.653706474316102e-06, "loss": 0.4073, "step": 2038 }, { "epoch": 1.7100922560805145, "grad_norm": 0.3604014448514675, "learning_rate": 4.648837080058351e-06, "loss": 0.3341, "step": 2039 }, { "epoch": 1.7109309477215544, "grad_norm": 0.33122317968075327, "learning_rate": 4.6439680204856255e-06, "loss": 0.2744, "step": 2040 }, { "epoch": 1.7117696393625943, "grad_norm": 0.37052399484236226, "learning_rate": 4.6390993002385115e-06, "loss": 0.314, "step": 2041 }, { "epoch": 1.7126083310036342, "grad_norm": 0.3439473867501183, "learning_rate": 4.634230923957273e-06, "loss": 0.3233, "step": 2042 }, { "epoch": 1.7134470226446743, "grad_norm": 0.3766689161765369, "learning_rate": 4.629362896281838e-06, "loss": 0.3375, "step": 2043 }, { "epoch": 1.7142857142857144, "grad_norm": 0.36159817301672115, "learning_rate": 4.624495221851812e-06, "loss": 0.354, "step": 2044 }, { "epoch": 1.7151244059267543, "grad_norm": 0.3682656710339429, "learning_rate": 4.619627905306457e-06, "loss": 0.3019, "step": 2045 }, { "epoch": 1.7159630975677942, "grad_norm": 0.35670650968452955, "learning_rate": 4.614760951284699e-06, "loss": 0.3434, "step": 2046 }, { "epoch": 1.716801789208834, "grad_norm": 0.38818248655076754, "learning_rate": 4.609894364425119e-06, "loss": 0.3461, "step": 2047 }, { "epoch": 1.7176404808498742, "grad_norm": 0.37771159596925485, "learning_rate": 4.605028149365938e-06, "loss": 0.3536, "step": 2048 }, { "epoch": 1.7184791724909143, "grad_norm": 0.3821180621575974, "learning_rate": 4.6001623107450335e-06, "loss": 0.3347, "step": 2049 }, { "epoch": 1.7193178641319542, "grad_norm": 0.3677879924153309, "learning_rate": 4.595296853199922e-06, "loss": 0.3203, "step": 2050 }, { "epoch": 1.720156555772994, "grad_norm": 0.3929705381923504, "learning_rate": 4.590431781367752e-06, "loss": 0.3621, "step": 2051 }, { "epoch": 1.720995247414034, "grad_norm": 0.37633180326937093, "learning_rate": 4.585567099885311e-06, "loss": 0.3427, "step": 2052 }, { "epoch": 1.721833939055074, "grad_norm": 0.36411218613329055, "learning_rate": 4.580702813389008e-06, "loss": 0.3256, "step": 2053 }, { "epoch": 1.7226726306961142, "grad_norm": 0.3469046881536596, "learning_rate": 4.575838926514882e-06, "loss": 0.3398, "step": 2054 }, { "epoch": 1.723511322337154, "grad_norm": 0.3862303069266079, "learning_rate": 4.5709754438985875e-06, "loss": 0.3408, "step": 2055 }, { "epoch": 1.724350013978194, "grad_norm": 0.343542919412061, "learning_rate": 4.566112370175391e-06, "loss": 0.2995, "step": 2056 }, { "epoch": 1.7251887056192339, "grad_norm": 0.36062250532581097, "learning_rate": 4.561249709980176e-06, "loss": 0.3429, "step": 2057 }, { "epoch": 1.726027397260274, "grad_norm": 0.37852173347838197, "learning_rate": 4.556387467947425e-06, "loss": 0.3698, "step": 2058 }, { "epoch": 1.726866088901314, "grad_norm": 0.33846736869894184, "learning_rate": 4.551525648711227e-06, "loss": 0.301, "step": 2059 }, { "epoch": 1.727704780542354, "grad_norm": 0.32379680083916745, "learning_rate": 4.546664256905268e-06, "loss": 0.3143, "step": 2060 }, { "epoch": 1.7285434721833939, "grad_norm": 0.38133135841804305, "learning_rate": 4.541803297162822e-06, "loss": 0.3379, "step": 2061 }, { "epoch": 1.7293821638244338, "grad_norm": 0.3746129345545903, "learning_rate": 4.536942774116754e-06, "loss": 0.3476, "step": 2062 }, { "epoch": 1.7302208554654739, "grad_norm": 0.35550269112871324, "learning_rate": 4.532082692399516e-06, "loss": 0.3448, "step": 2063 }, { "epoch": 1.731059547106514, "grad_norm": 0.37575319864430606, "learning_rate": 4.527223056643133e-06, "loss": 0.363, "step": 2064 }, { "epoch": 1.7318982387475539, "grad_norm": 0.36633991517144693, "learning_rate": 4.522363871479212e-06, "loss": 0.3204, "step": 2065 }, { "epoch": 1.7327369303885938, "grad_norm": 0.39634768910612467, "learning_rate": 4.5175051415389224e-06, "loss": 0.3288, "step": 2066 }, { "epoch": 1.7335756220296337, "grad_norm": 0.411701256107996, "learning_rate": 4.512646871453006e-06, "loss": 0.3358, "step": 2067 }, { "epoch": 1.7344143136706738, "grad_norm": 0.3585720753700032, "learning_rate": 4.507789065851767e-06, "loss": 0.3354, "step": 2068 }, { "epoch": 1.735253005311714, "grad_norm": 0.4037001092897285, "learning_rate": 4.50293172936506e-06, "loss": 0.3038, "step": 2069 }, { "epoch": 1.7360916969527538, "grad_norm": 0.41707038738662006, "learning_rate": 4.498074866622299e-06, "loss": 0.3169, "step": 2070 }, { "epoch": 1.7369303885937937, "grad_norm": 0.40234570432630695, "learning_rate": 4.493218482252445e-06, "loss": 0.3433, "step": 2071 }, { "epoch": 1.7377690802348336, "grad_norm": 0.381239081362176, "learning_rate": 4.488362580884e-06, "loss": 0.3461, "step": 2072 }, { "epoch": 1.7386077718758737, "grad_norm": 0.34295425863386797, "learning_rate": 4.4835071671450135e-06, "loss": 0.3199, "step": 2073 }, { "epoch": 1.7394464635169136, "grad_norm": 0.4459498907165205, "learning_rate": 4.478652245663058e-06, "loss": 0.3167, "step": 2074 }, { "epoch": 1.7402851551579537, "grad_norm": 0.3900931759232056, "learning_rate": 4.473797821065247e-06, "loss": 0.3314, "step": 2075 }, { "epoch": 1.7411238467989936, "grad_norm": 0.39964056536533904, "learning_rate": 4.468943897978218e-06, "loss": 0.3449, "step": 2076 }, { "epoch": 1.7419625384400335, "grad_norm": 0.33649789659967005, "learning_rate": 4.464090481028128e-06, "loss": 0.2887, "step": 2077 }, { "epoch": 1.7428012300810736, "grad_norm": 0.385966656845809, "learning_rate": 4.459237574840655e-06, "loss": 0.361, "step": 2078 }, { "epoch": 1.7436399217221135, "grad_norm": 0.40603683632707216, "learning_rate": 4.454385184040986e-06, "loss": 0.3241, "step": 2079 }, { "epoch": 1.7444786133631536, "grad_norm": 0.37397334463496024, "learning_rate": 4.449533313253821e-06, "loss": 0.3071, "step": 2080 }, { "epoch": 1.7453173050041935, "grad_norm": 0.3614754495349746, "learning_rate": 4.444681967103363e-06, "loss": 0.3525, "step": 2081 }, { "epoch": 1.7461559966452334, "grad_norm": 0.39114608900837466, "learning_rate": 4.439831150213312e-06, "loss": 0.3282, "step": 2082 }, { "epoch": 1.7469946882862735, "grad_norm": 0.38793659617564635, "learning_rate": 4.434980867206871e-06, "loss": 0.3371, "step": 2083 }, { "epoch": 1.7478333799273134, "grad_norm": 0.37986815435977644, "learning_rate": 4.430131122706724e-06, "loss": 0.3205, "step": 2084 }, { "epoch": 1.7486720715683535, "grad_norm": 0.4094203134128537, "learning_rate": 4.4252819213350496e-06, "loss": 0.329, "step": 2085 }, { "epoch": 1.7495107632093934, "grad_norm": 0.36206258927957036, "learning_rate": 4.420433267713508e-06, "loss": 0.3067, "step": 2086 }, { "epoch": 1.7503494548504333, "grad_norm": 0.38373463459802576, "learning_rate": 4.4155851664632325e-06, "loss": 0.3608, "step": 2087 }, { "epoch": 1.7511881464914731, "grad_norm": 0.3841747664150411, "learning_rate": 4.410737622204834e-06, "loss": 0.3015, "step": 2088 }, { "epoch": 1.7520268381325133, "grad_norm": 0.3582820306382889, "learning_rate": 4.405890639558393e-06, "loss": 0.3371, "step": 2089 }, { "epoch": 1.7528655297735534, "grad_norm": 0.36981305505322887, "learning_rate": 4.401044223143452e-06, "loss": 0.3346, "step": 2090 }, { "epoch": 1.7537042214145933, "grad_norm": 0.3759043218071809, "learning_rate": 4.396198377579017e-06, "loss": 0.3412, "step": 2091 }, { "epoch": 1.7545429130556331, "grad_norm": 0.37050327656564447, "learning_rate": 4.391353107483546e-06, "loss": 0.3169, "step": 2092 }, { "epoch": 1.755381604696673, "grad_norm": 0.3422941757739256, "learning_rate": 4.386508417474951e-06, "loss": 0.3123, "step": 2093 }, { "epoch": 1.7562202963377131, "grad_norm": 0.3727786055085344, "learning_rate": 4.381664312170592e-06, "loss": 0.3615, "step": 2094 }, { "epoch": 1.7570589879787533, "grad_norm": 0.38252331848791743, "learning_rate": 4.376820796187269e-06, "loss": 0.3437, "step": 2095 }, { "epoch": 1.7578976796197932, "grad_norm": 0.35067001429794736, "learning_rate": 4.371977874141225e-06, "loss": 0.321, "step": 2096 }, { "epoch": 1.758736371260833, "grad_norm": 0.36447333695044976, "learning_rate": 4.367135550648129e-06, "loss": 0.3183, "step": 2097 }, { "epoch": 1.759575062901873, "grad_norm": 0.3414555427934395, "learning_rate": 4.362293830323088e-06, "loss": 0.3413, "step": 2098 }, { "epoch": 1.760413754542913, "grad_norm": 0.351320653857181, "learning_rate": 4.35745271778063e-06, "loss": 0.3302, "step": 2099 }, { "epoch": 1.7612524461839532, "grad_norm": 0.33734767082552347, "learning_rate": 4.352612217634702e-06, "loss": 0.316, "step": 2100 }, { "epoch": 1.762091137824993, "grad_norm": 0.34381972818821954, "learning_rate": 4.347772334498672e-06, "loss": 0.334, "step": 2101 }, { "epoch": 1.762929829466033, "grad_norm": 0.3633981009554446, "learning_rate": 4.342933072985317e-06, "loss": 0.3464, "step": 2102 }, { "epoch": 1.7637685211070728, "grad_norm": 0.3538105739611332, "learning_rate": 4.338094437706821e-06, "loss": 0.3148, "step": 2103 }, { "epoch": 1.764607212748113, "grad_norm": 0.3603754841595614, "learning_rate": 4.333256433274775e-06, "loss": 0.3406, "step": 2104 }, { "epoch": 1.765445904389153, "grad_norm": 0.3353535002976378, "learning_rate": 4.328419064300162e-06, "loss": 0.3016, "step": 2105 }, { "epoch": 1.766284596030193, "grad_norm": 0.36741732503963315, "learning_rate": 4.323582335393365e-06, "loss": 0.3449, "step": 2106 }, { "epoch": 1.7671232876712328, "grad_norm": 0.3609753748091938, "learning_rate": 4.318746251164157e-06, "loss": 0.3205, "step": 2107 }, { "epoch": 1.7679619793122727, "grad_norm": 0.3679193519314195, "learning_rate": 4.313910816221691e-06, "loss": 0.3327, "step": 2108 }, { "epoch": 1.7688006709533128, "grad_norm": 0.3794822998358373, "learning_rate": 4.30907603517451e-06, "loss": 0.3668, "step": 2109 }, { "epoch": 1.769639362594353, "grad_norm": 0.3651751051055811, "learning_rate": 4.304241912630524e-06, "loss": 0.3288, "step": 2110 }, { "epoch": 1.7704780542353928, "grad_norm": 0.3724023255417829, "learning_rate": 4.299408453197024e-06, "loss": 0.3219, "step": 2111 }, { "epoch": 1.7713167458764327, "grad_norm": 0.39357730243929506, "learning_rate": 4.2945756614806624e-06, "loss": 0.3322, "step": 2112 }, { "epoch": 1.7721554375174726, "grad_norm": 0.35654545708583235, "learning_rate": 4.289743542087459e-06, "loss": 0.3244, "step": 2113 }, { "epoch": 1.7729941291585127, "grad_norm": 0.36001231134082495, "learning_rate": 4.284912099622793e-06, "loss": 0.3202, "step": 2114 }, { "epoch": 1.7738328207995528, "grad_norm": 0.3865847842242539, "learning_rate": 4.280081338691397e-06, "loss": 0.3352, "step": 2115 }, { "epoch": 1.7746715124405927, "grad_norm": 0.32909341074799536, "learning_rate": 4.275251263897353e-06, "loss": 0.3024, "step": 2116 }, { "epoch": 1.7755102040816326, "grad_norm": 0.38592603768621103, "learning_rate": 4.270421879844093e-06, "loss": 0.3717, "step": 2117 }, { "epoch": 1.7763488957226725, "grad_norm": 0.3742539583570403, "learning_rate": 4.265593191134384e-06, "loss": 0.3279, "step": 2118 }, { "epoch": 1.7771875873637126, "grad_norm": 0.34273794284569215, "learning_rate": 4.260765202370336e-06, "loss": 0.3041, "step": 2119 }, { "epoch": 1.7780262790047527, "grad_norm": 0.3728061901849653, "learning_rate": 4.2559379181533925e-06, "loss": 0.3269, "step": 2120 }, { "epoch": 1.7788649706457926, "grad_norm": 0.36959057531550443, "learning_rate": 4.2511113430843204e-06, "loss": 0.3391, "step": 2121 }, { "epoch": 1.7797036622868325, "grad_norm": 0.38230825775181554, "learning_rate": 4.246285481763217e-06, "loss": 0.3328, "step": 2122 }, { "epoch": 1.7805423539278724, "grad_norm": 0.34196811202969724, "learning_rate": 4.241460338789491e-06, "loss": 0.3316, "step": 2123 }, { "epoch": 1.7813810455689125, "grad_norm": 0.3341421990158707, "learning_rate": 4.236635918761876e-06, "loss": 0.2995, "step": 2124 }, { "epoch": 1.7822197372099526, "grad_norm": 0.35992547387722706, "learning_rate": 4.231812226278409e-06, "loss": 0.3388, "step": 2125 }, { "epoch": 1.7830584288509925, "grad_norm": 0.3839404279991923, "learning_rate": 4.226989265936437e-06, "loss": 0.3313, "step": 2126 }, { "epoch": 1.7838971204920324, "grad_norm": 0.34080077328871305, "learning_rate": 4.222167042332611e-06, "loss": 0.3305, "step": 2127 }, { "epoch": 1.7847358121330723, "grad_norm": 0.34682937953525855, "learning_rate": 4.217345560062877e-06, "loss": 0.3147, "step": 2128 }, { "epoch": 1.7855745037741124, "grad_norm": 0.36852763899696167, "learning_rate": 4.212524823722472e-06, "loss": 0.3369, "step": 2129 }, { "epoch": 1.7864131954151523, "grad_norm": 0.3536475724600682, "learning_rate": 4.20770483790593e-06, "loss": 0.3334, "step": 2130 }, { "epoch": 1.7872518870561924, "grad_norm": 0.36647598083582095, "learning_rate": 4.202885607207059e-06, "loss": 0.321, "step": 2131 }, { "epoch": 1.7880905786972323, "grad_norm": 0.37060697961458117, "learning_rate": 4.198067136218957e-06, "loss": 0.3413, "step": 2132 }, { "epoch": 1.7889292703382722, "grad_norm": 0.37854542739607433, "learning_rate": 4.193249429533996e-06, "loss": 0.355, "step": 2133 }, { "epoch": 1.7897679619793123, "grad_norm": 0.3592100565833062, "learning_rate": 4.188432491743813e-06, "loss": 0.3205, "step": 2134 }, { "epoch": 1.7906066536203522, "grad_norm": 0.39124294519484243, "learning_rate": 4.18361632743932e-06, "loss": 0.318, "step": 2135 }, { "epoch": 1.7914453452613923, "grad_norm": 0.39447166433219916, "learning_rate": 4.178800941210686e-06, "loss": 0.302, "step": 2136 }, { "epoch": 1.7922840369024322, "grad_norm": 0.3740060036255949, "learning_rate": 4.173986337647343e-06, "loss": 0.3272, "step": 2137 }, { "epoch": 1.793122728543472, "grad_norm": 0.3821218529615693, "learning_rate": 4.169172521337977e-06, "loss": 0.3867, "step": 2138 }, { "epoch": 1.7939614201845122, "grad_norm": 0.33674369635651774, "learning_rate": 4.164359496870516e-06, "loss": 0.2898, "step": 2139 }, { "epoch": 1.794800111825552, "grad_norm": 0.3943375112016677, "learning_rate": 4.159547268832144e-06, "loss": 0.3232, "step": 2140 }, { "epoch": 1.7956388034665922, "grad_norm": 0.3751819994527581, "learning_rate": 4.154735841809282e-06, "loss": 0.3373, "step": 2141 }, { "epoch": 1.796477495107632, "grad_norm": 0.3823543872441955, "learning_rate": 4.149925220387581e-06, "loss": 0.3372, "step": 2142 }, { "epoch": 1.797316186748672, "grad_norm": 0.3951333484706745, "learning_rate": 4.145115409151936e-06, "loss": 0.3558, "step": 2143 }, { "epoch": 1.7981548783897119, "grad_norm": 0.38045900864566, "learning_rate": 4.14030641268646e-06, "loss": 0.3083, "step": 2144 }, { "epoch": 1.798993570030752, "grad_norm": 0.3702279866076021, "learning_rate": 4.135498235574493e-06, "loss": 0.3218, "step": 2145 }, { "epoch": 1.799832261671792, "grad_norm": 0.3578485995973546, "learning_rate": 4.130690882398599e-06, "loss": 0.3, "step": 2146 }, { "epoch": 1.800670953312832, "grad_norm": 0.4060839781416815, "learning_rate": 4.125884357740545e-06, "loss": 0.3344, "step": 2147 }, { "epoch": 1.801509644953872, "grad_norm": 0.41215826590336396, "learning_rate": 4.1210786661813215e-06, "loss": 0.3753, "step": 2148 }, { "epoch": 1.8023483365949118, "grad_norm": 0.33788271144486537, "learning_rate": 4.1162738123011145e-06, "loss": 0.3, "step": 2149 }, { "epoch": 1.803187028235952, "grad_norm": 0.37639696575832543, "learning_rate": 4.111469800679316e-06, "loss": 0.3331, "step": 2150 }, { "epoch": 1.804025719876992, "grad_norm": 0.3350111184800719, "learning_rate": 4.106666635894519e-06, "loss": 0.3189, "step": 2151 }, { "epoch": 1.804864411518032, "grad_norm": 0.3477914903372478, "learning_rate": 4.101864322524501e-06, "loss": 0.3097, "step": 2152 }, { "epoch": 1.8057031031590718, "grad_norm": 0.3692470469277046, "learning_rate": 4.097062865146236e-06, "loss": 0.3422, "step": 2153 }, { "epoch": 1.8065417948001117, "grad_norm": 0.40194066385324356, "learning_rate": 4.09226226833588e-06, "loss": 0.3344, "step": 2154 }, { "epoch": 1.8073804864411518, "grad_norm": 0.3690607855312741, "learning_rate": 4.087462536668763e-06, "loss": 0.3317, "step": 2155 }, { "epoch": 1.808219178082192, "grad_norm": 0.40736879454076447, "learning_rate": 4.082663674719398e-06, "loss": 0.3478, "step": 2156 }, { "epoch": 1.8090578697232318, "grad_norm": 0.3555456205018071, "learning_rate": 4.077865687061465e-06, "loss": 0.3177, "step": 2157 }, { "epoch": 1.8098965613642717, "grad_norm": 0.3732980334156119, "learning_rate": 4.0730685782678125e-06, "loss": 0.3641, "step": 2158 }, { "epoch": 1.8107352530053116, "grad_norm": 0.35533983345057313, "learning_rate": 4.068272352910453e-06, "loss": 0.2977, "step": 2159 }, { "epoch": 1.8115739446463517, "grad_norm": 0.3486204420938532, "learning_rate": 4.0634770155605515e-06, "loss": 0.345, "step": 2160 }, { "epoch": 1.8124126362873918, "grad_norm": 0.35530307947752343, "learning_rate": 4.058682570788432e-06, "loss": 0.3387, "step": 2161 }, { "epoch": 1.8132513279284317, "grad_norm": 0.3515454100551724, "learning_rate": 4.053889023163563e-06, "loss": 0.3225, "step": 2162 }, { "epoch": 1.8140900195694716, "grad_norm": 0.36898826493661724, "learning_rate": 4.049096377254563e-06, "loss": 0.3392, "step": 2163 }, { "epoch": 1.8149287112105115, "grad_norm": 0.35671153125809396, "learning_rate": 4.044304637629191e-06, "loss": 0.3413, "step": 2164 }, { "epoch": 1.8157674028515516, "grad_norm": 0.3823422435753761, "learning_rate": 4.0395138088543345e-06, "loss": 0.3627, "step": 2165 }, { "epoch": 1.8166060944925917, "grad_norm": 0.36416361360224264, "learning_rate": 4.034723895496022e-06, "loss": 0.3432, "step": 2166 }, { "epoch": 1.8174447861336316, "grad_norm": 0.3705955286749317, "learning_rate": 4.029934902119408e-06, "loss": 0.3428, "step": 2167 }, { "epoch": 1.8182834777746715, "grad_norm": 0.3470492765945628, "learning_rate": 4.0251468332887634e-06, "loss": 0.3159, "step": 2168 }, { "epoch": 1.8191221694157114, "grad_norm": 0.3530677780788243, "learning_rate": 4.020359693567486e-06, "loss": 0.3512, "step": 2169 }, { "epoch": 1.8199608610567515, "grad_norm": 0.38274281224749174, "learning_rate": 4.015573487518082e-06, "loss": 0.372, "step": 2170 }, { "epoch": 1.8207995526977916, "grad_norm": 0.3627249941145728, "learning_rate": 4.010788219702174e-06, "loss": 0.3201, "step": 2171 }, { "epoch": 1.8216382443388315, "grad_norm": 0.33183518798561756, "learning_rate": 4.006003894680486e-06, "loss": 0.3142, "step": 2172 }, { "epoch": 1.8224769359798714, "grad_norm": 0.3796024758365083, "learning_rate": 4.001220517012842e-06, "loss": 0.3275, "step": 2173 }, { "epoch": 1.8233156276209113, "grad_norm": 0.35004498394153466, "learning_rate": 3.9964380912581695e-06, "loss": 0.3481, "step": 2174 }, { "epoch": 1.8241543192619514, "grad_norm": 0.39333783358401603, "learning_rate": 3.9916566219744796e-06, "loss": 0.3664, "step": 2175 }, { "epoch": 1.8249930109029915, "grad_norm": 0.3649046997960572, "learning_rate": 3.98687611371888e-06, "loss": 0.3131, "step": 2176 }, { "epoch": 1.8258317025440314, "grad_norm": 0.32019188730985404, "learning_rate": 3.982096571047562e-06, "loss": 0.274, "step": 2177 }, { "epoch": 1.8266703941850713, "grad_norm": 0.36459266285100544, "learning_rate": 3.977317998515788e-06, "loss": 0.3603, "step": 2178 }, { "epoch": 1.8275090858261112, "grad_norm": 0.3521018526163575, "learning_rate": 3.972540400677906e-06, "loss": 0.3427, "step": 2179 }, { "epoch": 1.8283477774671513, "grad_norm": 0.34751897802331294, "learning_rate": 3.967763782087332e-06, "loss": 0.3143, "step": 2180 }, { "epoch": 1.8291864691081914, "grad_norm": 0.323881773793323, "learning_rate": 3.962988147296543e-06, "loss": 0.2989, "step": 2181 }, { "epoch": 1.8300251607492313, "grad_norm": 0.33605107667318906, "learning_rate": 3.958213500857086e-06, "loss": 0.347, "step": 2182 }, { "epoch": 1.8308638523902712, "grad_norm": 0.3845877366870081, "learning_rate": 3.953439847319562e-06, "loss": 0.3179, "step": 2183 }, { "epoch": 1.831702544031311, "grad_norm": 0.34235910294520466, "learning_rate": 3.948667191233627e-06, "loss": 0.3252, "step": 2184 }, { "epoch": 1.8325412356723512, "grad_norm": 0.3477084377283107, "learning_rate": 3.943895537147988e-06, "loss": 0.3397, "step": 2185 }, { "epoch": 1.833379927313391, "grad_norm": 0.3595109259740695, "learning_rate": 3.939124889610391e-06, "loss": 0.3231, "step": 2186 }, { "epoch": 1.8342186189544312, "grad_norm": 0.345773374837391, "learning_rate": 3.934355253167628e-06, "loss": 0.3116, "step": 2187 }, { "epoch": 1.835057310595471, "grad_norm": 0.35928415689572646, "learning_rate": 3.929586632365529e-06, "loss": 0.324, "step": 2188 }, { "epoch": 1.835896002236511, "grad_norm": 0.36856952295145784, "learning_rate": 3.9248190317489484e-06, "loss": 0.3516, "step": 2189 }, { "epoch": 1.836734693877551, "grad_norm": 0.3730046008882706, "learning_rate": 3.920052455861778e-06, "loss": 0.3137, "step": 2190 }, { "epoch": 1.837573385518591, "grad_norm": 0.3413839026957385, "learning_rate": 3.915286909246922e-06, "loss": 0.3539, "step": 2191 }, { "epoch": 1.838412077159631, "grad_norm": 0.34198829117382945, "learning_rate": 3.910522396446315e-06, "loss": 0.3295, "step": 2192 }, { "epoch": 1.839250768800671, "grad_norm": 0.4024287445817092, "learning_rate": 3.9057589220008985e-06, "loss": 0.3536, "step": 2193 }, { "epoch": 1.8400894604417108, "grad_norm": 0.3286583373823832, "learning_rate": 3.900996490450626e-06, "loss": 0.3009, "step": 2194 }, { "epoch": 1.840928152082751, "grad_norm": 0.37922069739245196, "learning_rate": 3.8962351063344604e-06, "loss": 0.36, "step": 2195 }, { "epoch": 1.8417668437237908, "grad_norm": 0.3447897561072899, "learning_rate": 3.89147477419036e-06, "loss": 0.316, "step": 2196 }, { "epoch": 1.842605535364831, "grad_norm": 0.38902654649720847, "learning_rate": 3.886715498555286e-06, "loss": 0.3378, "step": 2197 }, { "epoch": 1.8434442270058709, "grad_norm": 0.3720370600850884, "learning_rate": 3.881957283965192e-06, "loss": 0.3251, "step": 2198 }, { "epoch": 1.8442829186469107, "grad_norm": 0.3460478562503942, "learning_rate": 3.877200134955017e-06, "loss": 0.3108, "step": 2199 }, { "epoch": 1.8451216102879506, "grad_norm": 0.3581454591025851, "learning_rate": 3.872444056058686e-06, "loss": 0.3335, "step": 2200 }, { "epoch": 1.8459603019289907, "grad_norm": 0.35631615885611106, "learning_rate": 3.867689051809108e-06, "loss": 0.3289, "step": 2201 }, { "epoch": 1.8467989935700309, "grad_norm": 0.3777317168074193, "learning_rate": 3.862935126738162e-06, "loss": 0.3414, "step": 2202 }, { "epoch": 1.8476376852110707, "grad_norm": 0.36285157038699467, "learning_rate": 3.858182285376703e-06, "loss": 0.3491, "step": 2203 }, { "epoch": 1.8484763768521106, "grad_norm": 0.37427509143979826, "learning_rate": 3.853430532254547e-06, "loss": 0.3562, "step": 2204 }, { "epoch": 1.8493150684931505, "grad_norm": 0.3470524398706251, "learning_rate": 3.848679871900481e-06, "loss": 0.298, "step": 2205 }, { "epoch": 1.8501537601341906, "grad_norm": 0.36128606222020654, "learning_rate": 3.843930308842245e-06, "loss": 0.3414, "step": 2206 }, { "epoch": 1.8509924517752308, "grad_norm": 0.34571634440494137, "learning_rate": 3.839181847606533e-06, "loss": 0.3119, "step": 2207 }, { "epoch": 1.8518311434162706, "grad_norm": 0.40173362904103643, "learning_rate": 3.834434492718994e-06, "loss": 0.3387, "step": 2208 }, { "epoch": 1.8526698350573105, "grad_norm": 0.36598588840603086, "learning_rate": 3.829688248704215e-06, "loss": 0.3026, "step": 2209 }, { "epoch": 1.8535085266983504, "grad_norm": 0.3633921420464868, "learning_rate": 3.824943120085731e-06, "loss": 0.3534, "step": 2210 }, { "epoch": 1.8543472183393905, "grad_norm": 0.328225046057419, "learning_rate": 3.82019911138601e-06, "loss": 0.3145, "step": 2211 }, { "epoch": 1.8551859099804306, "grad_norm": 0.35296506718032833, "learning_rate": 3.815456227126454e-06, "loss": 0.3284, "step": 2212 }, { "epoch": 1.8560246016214705, "grad_norm": 0.42561857986943596, "learning_rate": 3.8107144718273935e-06, "loss": 0.3466, "step": 2213 }, { "epoch": 1.8568632932625104, "grad_norm": 0.36983571688170574, "learning_rate": 3.8059738500080844e-06, "loss": 0.3042, "step": 2214 }, { "epoch": 1.8577019849035503, "grad_norm": 0.39297653267407784, "learning_rate": 3.8012343661866967e-06, "loss": 0.3387, "step": 2215 }, { "epoch": 1.8585406765445904, "grad_norm": 0.3681691162001658, "learning_rate": 3.7964960248803247e-06, "loss": 0.3102, "step": 2216 }, { "epoch": 1.8593793681856305, "grad_norm": 0.3671566230127683, "learning_rate": 3.791758830604963e-06, "loss": 0.3453, "step": 2217 }, { "epoch": 1.8602180598266704, "grad_norm": 0.38607687113607025, "learning_rate": 3.7870227878755217e-06, "loss": 0.3183, "step": 2218 }, { "epoch": 1.8610567514677103, "grad_norm": 0.43617223172216374, "learning_rate": 3.7822879012058123e-06, "loss": 0.3399, "step": 2219 }, { "epoch": 1.8618954431087502, "grad_norm": 0.3636857454173304, "learning_rate": 3.7775541751085387e-06, "loss": 0.35, "step": 2220 }, { "epoch": 1.8627341347497903, "grad_norm": 0.378752339178311, "learning_rate": 3.7728216140953077e-06, "loss": 0.351, "step": 2221 }, { "epoch": 1.8635728263908304, "grad_norm": 0.35820166950358373, "learning_rate": 3.768090222676605e-06, "loss": 0.3192, "step": 2222 }, { "epoch": 1.8644115180318703, "grad_norm": 0.38438500416912996, "learning_rate": 3.763360005361811e-06, "loss": 0.3338, "step": 2223 }, { "epoch": 1.8652502096729102, "grad_norm": 0.41638168262068975, "learning_rate": 3.7586309666591824e-06, "loss": 0.3379, "step": 2224 }, { "epoch": 1.86608890131395, "grad_norm": 0.37895057468604154, "learning_rate": 3.7539031110758528e-06, "loss": 0.301, "step": 2225 }, { "epoch": 1.8669275929549902, "grad_norm": 0.3889675135958173, "learning_rate": 3.749176443117829e-06, "loss": 0.3501, "step": 2226 }, { "epoch": 1.8677662845960303, "grad_norm": 0.3614046285568713, "learning_rate": 3.7444509672899894e-06, "loss": 0.2976, "step": 2227 }, { "epoch": 1.8686049762370702, "grad_norm": 0.4618309906180962, "learning_rate": 3.739726688096067e-06, "loss": 0.3401, "step": 2228 }, { "epoch": 1.8694436678781101, "grad_norm": 0.49873850006104964, "learning_rate": 3.7350036100386637e-06, "loss": 0.347, "step": 2229 }, { "epoch": 1.87028235951915, "grad_norm": 0.3695788565992298, "learning_rate": 3.73028173761923e-06, "loss": 0.3065, "step": 2230 }, { "epoch": 1.8711210511601901, "grad_norm": 0.4001711765210801, "learning_rate": 3.7255610753380723e-06, "loss": 0.3239, "step": 2231 }, { "epoch": 1.8719597428012302, "grad_norm": 0.4071798113055839, "learning_rate": 3.7208416276943416e-06, "loss": 0.3042, "step": 2232 }, { "epoch": 1.8727984344422701, "grad_norm": 0.42926398439093094, "learning_rate": 3.71612339918603e-06, "loss": 0.3467, "step": 2233 }, { "epoch": 1.87363712608331, "grad_norm": 0.3665196057581528, "learning_rate": 3.711406394309971e-06, "loss": 0.2971, "step": 2234 }, { "epoch": 1.87447581772435, "grad_norm": 0.3875027800667779, "learning_rate": 3.7066906175618265e-06, "loss": 0.3342, "step": 2235 }, { "epoch": 1.87531450936539, "grad_norm": 0.41444347265604614, "learning_rate": 3.701976073436093e-06, "loss": 0.3495, "step": 2236 }, { "epoch": 1.8761532010064301, "grad_norm": 0.35558860701677275, "learning_rate": 3.6972627664260914e-06, "loss": 0.3238, "step": 2237 }, { "epoch": 1.87699189264747, "grad_norm": 0.34792454219580293, "learning_rate": 3.6925507010239602e-06, "loss": 0.335, "step": 2238 }, { "epoch": 1.87783058428851, "grad_norm": 0.3549750754632262, "learning_rate": 3.687839881720657e-06, "loss": 0.3436, "step": 2239 }, { "epoch": 1.8786692759295498, "grad_norm": 0.3537999079872674, "learning_rate": 3.6831303130059564e-06, "loss": 0.3489, "step": 2240 }, { "epoch": 1.87950796757059, "grad_norm": 0.3393003457847989, "learning_rate": 3.67842199936843e-06, "loss": 0.3109, "step": 2241 }, { "epoch": 1.88034665921163, "grad_norm": 0.3935290461575288, "learning_rate": 3.673714945295463e-06, "loss": 0.3746, "step": 2242 }, { "epoch": 1.88118535085267, "grad_norm": 0.40206773068951385, "learning_rate": 3.6690091552732344e-06, "loss": 0.3325, "step": 2243 }, { "epoch": 1.8820240424937098, "grad_norm": 0.36576352397802775, "learning_rate": 3.664304633786723e-06, "loss": 0.3159, "step": 2244 }, { "epoch": 1.8828627341347497, "grad_norm": 0.3555744699901997, "learning_rate": 3.6596013853196964e-06, "loss": 0.3431, "step": 2245 }, { "epoch": 1.8837014257757898, "grad_norm": 0.36897903368971546, "learning_rate": 3.6548994143547066e-06, "loss": 0.3286, "step": 2246 }, { "epoch": 1.8845401174168297, "grad_norm": 0.3749699865290116, "learning_rate": 3.650198725373094e-06, "loss": 0.3132, "step": 2247 }, { "epoch": 1.8853788090578698, "grad_norm": 0.3693352348859699, "learning_rate": 3.645499322854969e-06, "loss": 0.3557, "step": 2248 }, { "epoch": 1.8862175006989097, "grad_norm": 0.34175957667891443, "learning_rate": 3.6408012112792225e-06, "loss": 0.3438, "step": 2249 }, { "epoch": 1.8870561923399496, "grad_norm": 0.34066036003659705, "learning_rate": 3.6361043951235132e-06, "loss": 0.3053, "step": 2250 }, { "epoch": 1.8878948839809897, "grad_norm": 0.3498003060900975, "learning_rate": 3.6314088788642636e-06, "loss": 0.3288, "step": 2251 }, { "epoch": 1.8887335756220296, "grad_norm": 0.374834401605339, "learning_rate": 3.6267146669766595e-06, "loss": 0.3378, "step": 2252 }, { "epoch": 1.8895722672630697, "grad_norm": 0.3790047895313454, "learning_rate": 3.622021763934645e-06, "loss": 0.352, "step": 2253 }, { "epoch": 1.8904109589041096, "grad_norm": 0.35401763363464556, "learning_rate": 3.6173301742109096e-06, "loss": 0.2995, "step": 2254 }, { "epoch": 1.8912496505451495, "grad_norm": 0.3378969907400238, "learning_rate": 3.6126399022768998e-06, "loss": 0.3488, "step": 2255 }, { "epoch": 1.8920883421861896, "grad_norm": 0.3345362635141331, "learning_rate": 3.607950952602799e-06, "loss": 0.3265, "step": 2256 }, { "epoch": 1.8929270338272295, "grad_norm": 0.35003863297585214, "learning_rate": 3.6032633296575353e-06, "loss": 0.3371, "step": 2257 }, { "epoch": 1.8937657254682696, "grad_norm": 0.34897744525116275, "learning_rate": 3.5985770379087725e-06, "loss": 0.32, "step": 2258 }, { "epoch": 1.8946044171093095, "grad_norm": 0.3506011775458283, "learning_rate": 3.5938920818229005e-06, "loss": 0.3291, "step": 2259 }, { "epoch": 1.8954431087503494, "grad_norm": 0.3580622695010703, "learning_rate": 3.5892084658650413e-06, "loss": 0.3489, "step": 2260 }, { "epoch": 1.8962818003913893, "grad_norm": 0.3391641329415094, "learning_rate": 3.5845261944990363e-06, "loss": 0.3114, "step": 2261 }, { "epoch": 1.8971204920324294, "grad_norm": 0.37530831584836055, "learning_rate": 3.5798452721874487e-06, "loss": 0.3109, "step": 2262 }, { "epoch": 1.8979591836734695, "grad_norm": 0.34428325181837294, "learning_rate": 3.5751657033915553e-06, "loss": 0.3551, "step": 2263 }, { "epoch": 1.8987978753145094, "grad_norm": 0.3291411693941037, "learning_rate": 3.5704874925713384e-06, "loss": 0.3068, "step": 2264 }, { "epoch": 1.8996365669555493, "grad_norm": 0.358618199717466, "learning_rate": 3.565810644185491e-06, "loss": 0.3482, "step": 2265 }, { "epoch": 1.9004752585965892, "grad_norm": 0.3449629284191607, "learning_rate": 3.561135162691409e-06, "loss": 0.334, "step": 2266 }, { "epoch": 1.9013139502376293, "grad_norm": 0.36316932758642845, "learning_rate": 3.556461052545177e-06, "loss": 0.3115, "step": 2267 }, { "epoch": 1.9021526418786694, "grad_norm": 0.363625620525954, "learning_rate": 3.551788318201582e-06, "loss": 0.3375, "step": 2268 }, { "epoch": 1.9029913335197093, "grad_norm": 0.3678375150577309, "learning_rate": 3.5471169641140945e-06, "loss": 0.3154, "step": 2269 }, { "epoch": 1.9038300251607492, "grad_norm": 0.33950898723586875, "learning_rate": 3.5424469947348716e-06, "loss": 0.3143, "step": 2270 }, { "epoch": 1.904668716801789, "grad_norm": 0.3855655286830475, "learning_rate": 3.537778414514752e-06, "loss": 0.3538, "step": 2271 }, { "epoch": 1.9055074084428292, "grad_norm": 0.32628910545476575, "learning_rate": 3.533111227903244e-06, "loss": 0.2941, "step": 2272 }, { "epoch": 1.9063461000838693, "grad_norm": 0.3990783780648975, "learning_rate": 3.528445439348537e-06, "loss": 0.3519, "step": 2273 }, { "epoch": 1.9071847917249092, "grad_norm": 0.36010025529895523, "learning_rate": 3.523781053297478e-06, "loss": 0.3132, "step": 2274 }, { "epoch": 1.908023483365949, "grad_norm": 0.3758247367741781, "learning_rate": 3.5191180741955854e-06, "loss": 0.3355, "step": 2275 }, { "epoch": 1.908862175006989, "grad_norm": 0.3625649862761708, "learning_rate": 3.5144565064870363e-06, "loss": 0.3349, "step": 2276 }, { "epoch": 1.909700866648029, "grad_norm": 0.3616798034948352, "learning_rate": 3.509796354614654e-06, "loss": 0.3371, "step": 2277 }, { "epoch": 1.9105395582890692, "grad_norm": 0.33320953344098553, "learning_rate": 3.5051376230199217e-06, "loss": 0.2975, "step": 2278 }, { "epoch": 1.911378249930109, "grad_norm": 0.33937276767043456, "learning_rate": 3.500480316142968e-06, "loss": 0.3327, "step": 2279 }, { "epoch": 1.912216941571149, "grad_norm": 0.3536358628736911, "learning_rate": 3.495824438422556e-06, "loss": 0.3345, "step": 2280 }, { "epoch": 1.9130556332121889, "grad_norm": 0.3330558334418352, "learning_rate": 3.4911699942960964e-06, "loss": 0.3199, "step": 2281 }, { "epoch": 1.913894324853229, "grad_norm": 0.3621134712000504, "learning_rate": 3.4865169881996264e-06, "loss": 0.307, "step": 2282 }, { "epoch": 1.914733016494269, "grad_norm": 0.3872167742267918, "learning_rate": 3.481865424567816e-06, "loss": 0.3341, "step": 2283 }, { "epoch": 1.915571708135309, "grad_norm": 0.33699209488004256, "learning_rate": 3.4772153078339633e-06, "loss": 0.3242, "step": 2284 }, { "epoch": 1.9164103997763489, "grad_norm": 0.3749348473262737, "learning_rate": 3.472566642429979e-06, "loss": 0.3277, "step": 2285 }, { "epoch": 1.9172490914173888, "grad_norm": 0.36081222482001174, "learning_rate": 3.4679194327863986e-06, "loss": 0.3517, "step": 2286 }, { "epoch": 1.9180877830584289, "grad_norm": 0.3289283299642621, "learning_rate": 3.463273683332364e-06, "loss": 0.3057, "step": 2287 }, { "epoch": 1.918926474699469, "grad_norm": 0.38683664459821504, "learning_rate": 3.458629398495632e-06, "loss": 0.3645, "step": 2288 }, { "epoch": 1.9197651663405089, "grad_norm": 0.35989444111073365, "learning_rate": 3.45398658270256e-06, "loss": 0.3479, "step": 2289 }, { "epoch": 1.9206038579815488, "grad_norm": 0.3304907411445346, "learning_rate": 3.4493452403781023e-06, "loss": 0.317, "step": 2290 }, { "epoch": 1.9214425496225886, "grad_norm": 0.3459306834852621, "learning_rate": 3.444705375945814e-06, "loss": 0.3046, "step": 2291 }, { "epoch": 1.9222812412636288, "grad_norm": 0.3551392026455499, "learning_rate": 3.440066993827839e-06, "loss": 0.3283, "step": 2292 }, { "epoch": 1.9231199329046689, "grad_norm": 0.38159389054370263, "learning_rate": 3.4354300984449086e-06, "loss": 0.3511, "step": 2293 }, { "epoch": 1.9239586245457088, "grad_norm": 0.35530060707911276, "learning_rate": 3.4307946942163397e-06, "loss": 0.3064, "step": 2294 }, { "epoch": 1.9247973161867487, "grad_norm": 0.3838739332131731, "learning_rate": 3.426160785560022e-06, "loss": 0.3298, "step": 2295 }, { "epoch": 1.9256360078277885, "grad_norm": 0.33982861063375847, "learning_rate": 3.4215283768924257e-06, "loss": 0.3465, "step": 2296 }, { "epoch": 1.9264746994688287, "grad_norm": 0.3629891753618276, "learning_rate": 3.416897472628591e-06, "loss": 0.3214, "step": 2297 }, { "epoch": 1.9273133911098688, "grad_norm": 0.38310727113835125, "learning_rate": 3.4122680771821193e-06, "loss": 0.3312, "step": 2298 }, { "epoch": 1.9281520827509087, "grad_norm": 0.3637336882561836, "learning_rate": 3.4076401949651806e-06, "loss": 0.3118, "step": 2299 }, { "epoch": 1.9289907743919485, "grad_norm": 0.3821275916917317, "learning_rate": 3.4030138303884953e-06, "loss": 0.3436, "step": 2300 }, { "epoch": 1.9298294660329884, "grad_norm": 0.3412272836788147, "learning_rate": 3.398388987861344e-06, "loss": 0.3248, "step": 2301 }, { "epoch": 1.9306681576740286, "grad_norm": 0.35538362912787236, "learning_rate": 3.3937656717915566e-06, "loss": 0.3531, "step": 2302 }, { "epoch": 1.9315068493150684, "grad_norm": 0.36989410053948657, "learning_rate": 3.389143886585501e-06, "loss": 0.3448, "step": 2303 }, { "epoch": 1.9323455409561086, "grad_norm": 0.38425906650401476, "learning_rate": 3.3845236366480917e-06, "loss": 0.3534, "step": 2304 }, { "epoch": 1.9331842325971484, "grad_norm": 0.34630658449920604, "learning_rate": 3.3799049263827817e-06, "loss": 0.3345, "step": 2305 }, { "epoch": 1.9340229242381883, "grad_norm": 0.3667312525289303, "learning_rate": 3.375287760191551e-06, "loss": 0.3394, "step": 2306 }, { "epoch": 1.9348616158792284, "grad_norm": 0.3532709105127156, "learning_rate": 3.3706721424749146e-06, "loss": 0.3203, "step": 2307 }, { "epoch": 1.9357003075202683, "grad_norm": 0.3302467633001394, "learning_rate": 3.366058077631903e-06, "loss": 0.3018, "step": 2308 }, { "epoch": 1.9365389991613084, "grad_norm": 0.37214413002916885, "learning_rate": 3.3614455700600746e-06, "loss": 0.347, "step": 2309 }, { "epoch": 1.9373776908023483, "grad_norm": 0.40593515036838607, "learning_rate": 3.3568346241555004e-06, "loss": 0.299, "step": 2310 }, { "epoch": 1.9382163824433882, "grad_norm": 0.3652340287708907, "learning_rate": 3.352225244312762e-06, "loss": 0.3194, "step": 2311 }, { "epoch": 1.9390550740844283, "grad_norm": 0.35427104244941443, "learning_rate": 3.347617434924954e-06, "loss": 0.3339, "step": 2312 }, { "epoch": 1.9398937657254682, "grad_norm": 0.35603438305106433, "learning_rate": 3.343011200383664e-06, "loss": 0.2998, "step": 2313 }, { "epoch": 1.9407324573665083, "grad_norm": 0.382990833682902, "learning_rate": 3.338406545078987e-06, "loss": 0.3363, "step": 2314 }, { "epoch": 1.9415711490075482, "grad_norm": 0.3565409263782093, "learning_rate": 3.333803473399513e-06, "loss": 0.3358, "step": 2315 }, { "epoch": 1.9424098406485881, "grad_norm": 0.35770299902057634, "learning_rate": 3.3292019897323156e-06, "loss": 0.3073, "step": 2316 }, { "epoch": 1.943248532289628, "grad_norm": 0.3967018337256648, "learning_rate": 3.3246020984629625e-06, "loss": 0.339, "step": 2317 }, { "epoch": 1.9440872239306681, "grad_norm": 0.3651096639594851, "learning_rate": 3.3200038039755002e-06, "loss": 0.3502, "step": 2318 }, { "epoch": 1.9449259155717082, "grad_norm": 0.3692825072424445, "learning_rate": 3.3154071106524523e-06, "loss": 0.3101, "step": 2319 }, { "epoch": 1.9457646072127481, "grad_norm": 0.35727947949273997, "learning_rate": 3.3108120228748215e-06, "loss": 0.3538, "step": 2320 }, { "epoch": 1.946603298853788, "grad_norm": 0.35207854950467826, "learning_rate": 3.306218545022072e-06, "loss": 0.332, "step": 2321 }, { "epoch": 1.947441990494828, "grad_norm": 0.3707856332774218, "learning_rate": 3.3016266814721404e-06, "loss": 0.3205, "step": 2322 }, { "epoch": 1.948280682135868, "grad_norm": 0.3286484123416293, "learning_rate": 3.2970364366014234e-06, "loss": 0.3129, "step": 2323 }, { "epoch": 1.9491193737769081, "grad_norm": 0.33014317522970427, "learning_rate": 3.2924478147847727e-06, "loss": 0.3166, "step": 2324 }, { "epoch": 1.949958065417948, "grad_norm": 0.3248466317776124, "learning_rate": 3.2878608203954975e-06, "loss": 0.3082, "step": 2325 }, { "epoch": 1.950796757058988, "grad_norm": 0.35538823077795234, "learning_rate": 3.2832754578053494e-06, "loss": 0.347, "step": 2326 }, { "epoch": 1.9516354487000278, "grad_norm": 0.3282568995189095, "learning_rate": 3.2786917313845295e-06, "loss": 0.3299, "step": 2327 }, { "epoch": 1.952474140341068, "grad_norm": 0.34603549460836913, "learning_rate": 3.2741096455016814e-06, "loss": 0.3257, "step": 2328 }, { "epoch": 1.953312831982108, "grad_norm": 0.33816292442679097, "learning_rate": 3.269529204523877e-06, "loss": 0.3093, "step": 2329 }, { "epoch": 1.954151523623148, "grad_norm": 0.3859343102877558, "learning_rate": 3.264950412816629e-06, "loss": 0.3315, "step": 2330 }, { "epoch": 1.9549902152641878, "grad_norm": 0.3591617971397291, "learning_rate": 3.260373274743875e-06, "loss": 0.2919, "step": 2331 }, { "epoch": 1.9558289069052277, "grad_norm": 0.34769617902597916, "learning_rate": 3.255797794667974e-06, "loss": 0.3479, "step": 2332 }, { "epoch": 1.9566675985462678, "grad_norm": 0.34723246991393053, "learning_rate": 3.2512239769497124e-06, "loss": 0.3221, "step": 2333 }, { "epoch": 1.957506290187308, "grad_norm": 0.33682597723989494, "learning_rate": 3.2466518259482803e-06, "loss": 0.3453, "step": 2334 }, { "epoch": 1.9583449818283478, "grad_norm": 0.3427338161075605, "learning_rate": 3.2420813460212895e-06, "loss": 0.3282, "step": 2335 }, { "epoch": 1.9591836734693877, "grad_norm": 0.3635941931429406, "learning_rate": 3.2375125415247564e-06, "loss": 0.333, "step": 2336 }, { "epoch": 1.9600223651104276, "grad_norm": 0.3311609014620527, "learning_rate": 3.232945416813098e-06, "loss": 0.2954, "step": 2337 }, { "epoch": 1.9608610567514677, "grad_norm": 0.38359497840316303, "learning_rate": 3.228379976239134e-06, "loss": 0.3188, "step": 2338 }, { "epoch": 1.9616997483925078, "grad_norm": 0.37182749843702606, "learning_rate": 3.223816224154078e-06, "loss": 0.3422, "step": 2339 }, { "epoch": 1.9625384400335477, "grad_norm": 0.3477310987985252, "learning_rate": 3.2192541649075306e-06, "loss": 0.3139, "step": 2340 }, { "epoch": 1.9633771316745876, "grad_norm": 0.3581585879634647, "learning_rate": 3.2146938028474832e-06, "loss": 0.3596, "step": 2341 }, { "epoch": 1.9642158233156275, "grad_norm": 0.3631917181371047, "learning_rate": 3.2101351423203087e-06, "loss": 0.3224, "step": 2342 }, { "epoch": 1.9650545149566676, "grad_norm": 0.34342758140522306, "learning_rate": 3.205578187670757e-06, "loss": 0.3057, "step": 2343 }, { "epoch": 1.9658932065977077, "grad_norm": 0.35214799944808384, "learning_rate": 3.201022943241957e-06, "loss": 0.3516, "step": 2344 }, { "epoch": 1.9667318982387476, "grad_norm": 0.32813196803988054, "learning_rate": 3.1964694133753984e-06, "loss": 0.3187, "step": 2345 }, { "epoch": 1.9675705898797875, "grad_norm": 0.33277343808689563, "learning_rate": 3.1919176024109473e-06, "loss": 0.3386, "step": 2346 }, { "epoch": 1.9684092815208274, "grad_norm": 0.3326209986744906, "learning_rate": 3.187367514686821e-06, "loss": 0.3079, "step": 2347 }, { "epoch": 1.9692479731618675, "grad_norm": 0.37672146284151276, "learning_rate": 3.1828191545396037e-06, "loss": 0.375, "step": 2348 }, { "epoch": 1.9700866648029076, "grad_norm": 0.3645513346169228, "learning_rate": 3.178272526304229e-06, "loss": 0.2955, "step": 2349 }, { "epoch": 1.9709253564439475, "grad_norm": 0.34327870742824906, "learning_rate": 3.173727634313979e-06, "loss": 0.3469, "step": 2350 }, { "epoch": 1.9717640480849874, "grad_norm": 0.34554552359980495, "learning_rate": 3.169184482900482e-06, "loss": 0.3063, "step": 2351 }, { "epoch": 1.9726027397260273, "grad_norm": 0.3645015675805596, "learning_rate": 3.164643076393711e-06, "loss": 0.3352, "step": 2352 }, { "epoch": 1.9734414313670674, "grad_norm": 0.335163686923835, "learning_rate": 3.160103419121967e-06, "loss": 0.3101, "step": 2353 }, { "epoch": 1.9742801230081075, "grad_norm": 0.35296601204850736, "learning_rate": 3.155565515411893e-06, "loss": 0.3668, "step": 2354 }, { "epoch": 1.9751188146491474, "grad_norm": 0.33003922470337566, "learning_rate": 3.1510293695884555e-06, "loss": 0.3209, "step": 2355 }, { "epoch": 1.9759575062901873, "grad_norm": 0.36801557442999605, "learning_rate": 3.146494985974948e-06, "loss": 0.3545, "step": 2356 }, { "epoch": 1.9767961979312272, "grad_norm": 0.3948539556419914, "learning_rate": 3.1419623688929858e-06, "loss": 0.3509, "step": 2357 }, { "epoch": 1.9776348895722673, "grad_norm": 0.34225194287681077, "learning_rate": 3.1374315226624937e-06, "loss": 0.3428, "step": 2358 }, { "epoch": 1.9784735812133072, "grad_norm": 0.3243601954423298, "learning_rate": 3.132902451601717e-06, "loss": 0.3396, "step": 2359 }, { "epoch": 1.9793122728543473, "grad_norm": 0.33886051588729954, "learning_rate": 3.128375160027204e-06, "loss": 0.326, "step": 2360 }, { "epoch": 1.9801509644953872, "grad_norm": 0.35663751139070476, "learning_rate": 3.1238496522538085e-06, "loss": 0.3179, "step": 2361 }, { "epoch": 1.980989656136427, "grad_norm": 0.35952942322297926, "learning_rate": 3.1193259325946874e-06, "loss": 0.3324, "step": 2362 }, { "epoch": 1.9818283477774672, "grad_norm": 0.33308520620095294, "learning_rate": 3.114804005361286e-06, "loss": 0.3231, "step": 2363 }, { "epoch": 1.982667039418507, "grad_norm": 0.36628078696693006, "learning_rate": 3.1102838748633475e-06, "loss": 0.3349, "step": 2364 }, { "epoch": 1.9835057310595472, "grad_norm": 0.348304289716979, "learning_rate": 3.1057655454089024e-06, "loss": 0.3152, "step": 2365 }, { "epoch": 1.984344422700587, "grad_norm": 0.38302615263509204, "learning_rate": 3.10124902130426e-06, "loss": 0.3877, "step": 2366 }, { "epoch": 1.985183114341627, "grad_norm": 0.32106572923033616, "learning_rate": 3.096734306854014e-06, "loss": 0.2857, "step": 2367 }, { "epoch": 1.986021805982667, "grad_norm": 0.3530262681095776, "learning_rate": 3.09222140636103e-06, "loss": 0.3147, "step": 2368 }, { "epoch": 1.986860497623707, "grad_norm": 0.3648818851353045, "learning_rate": 3.0877103241264466e-06, "loss": 0.3347, "step": 2369 }, { "epoch": 1.987699189264747, "grad_norm": 0.3334250086476442, "learning_rate": 3.083201064449671e-06, "loss": 0.3371, "step": 2370 }, { "epoch": 1.988537880905787, "grad_norm": 0.32517741296392705, "learning_rate": 3.078693631628369e-06, "loss": 0.3501, "step": 2371 }, { "epoch": 1.9893765725468269, "grad_norm": 0.38052409363425177, "learning_rate": 3.074188029958468e-06, "loss": 0.3256, "step": 2372 }, { "epoch": 1.9902152641878668, "grad_norm": 0.3670650161886805, "learning_rate": 3.0696842637341495e-06, "loss": 0.3232, "step": 2373 }, { "epoch": 1.9910539558289069, "grad_norm": 0.3573195308790297, "learning_rate": 3.065182337247846e-06, "loss": 0.3305, "step": 2374 }, { "epoch": 1.991892647469947, "grad_norm": 0.2881367228050685, "learning_rate": 3.0606822547902394e-06, "loss": 0.2787, "step": 2375 }, { "epoch": 1.9927313391109869, "grad_norm": 0.3612502910433541, "learning_rate": 3.056184020650247e-06, "loss": 0.3446, "step": 2376 }, { "epoch": 1.9935700307520268, "grad_norm": 0.3315548632677151, "learning_rate": 3.0516876391150306e-06, "loss": 0.3486, "step": 2377 }, { "epoch": 1.9944087223930667, "grad_norm": 0.33686623336809385, "learning_rate": 3.0471931144699864e-06, "loss": 0.3172, "step": 2378 }, { "epoch": 1.9952474140341068, "grad_norm": 0.3340153870484378, "learning_rate": 3.042700450998735e-06, "loss": 0.296, "step": 2379 }, { "epoch": 1.9960861056751469, "grad_norm": 0.3535779754901902, "learning_rate": 3.0382096529831307e-06, "loss": 0.2964, "step": 2380 }, { "epoch": 1.9969247973161868, "grad_norm": 0.3398655454003243, "learning_rate": 3.033720724703243e-06, "loss": 0.3148, "step": 2381 }, { "epoch": 1.9977634889572267, "grad_norm": 0.33908352307275835, "learning_rate": 3.0292336704373647e-06, "loss": 0.3531, "step": 2382 }, { "epoch": 1.9986021805982666, "grad_norm": 0.2970207713690136, "learning_rate": 3.0247484944620027e-06, "loss": 0.2903, "step": 2383 }, { "epoch": 1.9994408722393067, "grad_norm": 0.34924487935541987, "learning_rate": 3.0202652010518664e-06, "loss": 0.3605, "step": 2384 }, { "epoch": 2.000279563880347, "grad_norm": 0.6921746662884067, "learning_rate": 3.015783794479881e-06, "loss": 0.4991, "step": 2385 }, { "epoch": 2.0011182555213867, "grad_norm": 0.37075067031480125, "learning_rate": 3.0113042790171637e-06, "loss": 0.2799, "step": 2386 }, { "epoch": 2.0019569471624266, "grad_norm": 0.3579603368766746, "learning_rate": 3.0068266589330374e-06, "loss": 0.2921, "step": 2387 }, { "epoch": 2.0027956388034664, "grad_norm": 0.34709850562827144, "learning_rate": 3.0023509384950166e-06, "loss": 0.3054, "step": 2388 }, { "epoch": 2.003634330444507, "grad_norm": 0.3539656142229123, "learning_rate": 2.9978771219688003e-06, "loss": 0.3127, "step": 2389 }, { "epoch": 2.0044730220855467, "grad_norm": 0.34936560135263317, "learning_rate": 2.993405213618278e-06, "loss": 0.2756, "step": 2390 }, { "epoch": 2.0053117137265866, "grad_norm": 0.3582238370844951, "learning_rate": 2.988935217705521e-06, "loss": 0.3056, "step": 2391 }, { "epoch": 2.0061504053676265, "grad_norm": 0.39383488109019066, "learning_rate": 2.984467138490773e-06, "loss": 0.3319, "step": 2392 }, { "epoch": 2.0069890970086663, "grad_norm": 0.3225337410820237, "learning_rate": 2.980000980232458e-06, "loss": 0.2882, "step": 2393 }, { "epoch": 2.0078277886497062, "grad_norm": 0.39966550506290244, "learning_rate": 2.975536747187161e-06, "loss": 0.3273, "step": 2394 }, { "epoch": 2.0086664802907466, "grad_norm": 0.33512691483052637, "learning_rate": 2.9710744436096384e-06, "loss": 0.2718, "step": 2395 }, { "epoch": 2.0095051719317865, "grad_norm": 0.3304583390879451, "learning_rate": 2.966614073752807e-06, "loss": 0.28, "step": 2396 }, { "epoch": 2.0103438635728264, "grad_norm": 0.38447274058008507, "learning_rate": 2.9621556418677354e-06, "loss": 0.3291, "step": 2397 }, { "epoch": 2.0111825552138662, "grad_norm": 0.3744956884434251, "learning_rate": 2.9576991522036525e-06, "loss": 0.2873, "step": 2398 }, { "epoch": 2.012021246854906, "grad_norm": 0.35504863502459766, "learning_rate": 2.9532446090079302e-06, "loss": 0.3151, "step": 2399 }, { "epoch": 2.0128599384959465, "grad_norm": 0.3405583250998951, "learning_rate": 2.9487920165260887e-06, "loss": 0.3104, "step": 2400 }, { "epoch": 2.0136986301369864, "grad_norm": 0.36392955029402413, "learning_rate": 2.9443413790017904e-06, "loss": 0.3157, "step": 2401 }, { "epoch": 2.0145373217780262, "grad_norm": 0.33260151803447635, "learning_rate": 2.9398927006768274e-06, "loss": 0.2734, "step": 2402 }, { "epoch": 2.015376013419066, "grad_norm": 0.35100433610350534, "learning_rate": 2.9354459857911323e-06, "loss": 0.2999, "step": 2403 }, { "epoch": 2.016214705060106, "grad_norm": 0.3916521726801628, "learning_rate": 2.9310012385827625e-06, "loss": 0.3027, "step": 2404 }, { "epoch": 2.0170533967011464, "grad_norm": 0.35451883994477873, "learning_rate": 2.9265584632879e-06, "loss": 0.3031, "step": 2405 }, { "epoch": 2.0178920883421863, "grad_norm": 0.33933988657831393, "learning_rate": 2.9221176641408515e-06, "loss": 0.2948, "step": 2406 }, { "epoch": 2.018730779983226, "grad_norm": 0.3571393067226839, "learning_rate": 2.9176788453740314e-06, "loss": 0.315, "step": 2407 }, { "epoch": 2.019569471624266, "grad_norm": 0.32047829135494416, "learning_rate": 2.913242011217977e-06, "loss": 0.29, "step": 2408 }, { "epoch": 2.020408163265306, "grad_norm": 0.31706836871080574, "learning_rate": 2.9088071659013295e-06, "loss": 0.3036, "step": 2409 }, { "epoch": 2.0212468549063463, "grad_norm": 0.35265559648892336, "learning_rate": 2.9043743136508313e-06, "loss": 0.316, "step": 2410 }, { "epoch": 2.022085546547386, "grad_norm": 0.330146501587056, "learning_rate": 2.8999434586913316e-06, "loss": 0.2932, "step": 2411 }, { "epoch": 2.022924238188426, "grad_norm": 0.347966590554414, "learning_rate": 2.8955146052457696e-06, "loss": 0.3216, "step": 2412 }, { "epoch": 2.023762929829466, "grad_norm": 0.3202787490350012, "learning_rate": 2.891087757535182e-06, "loss": 0.2756, "step": 2413 }, { "epoch": 2.024601621470506, "grad_norm": 0.3540279828278088, "learning_rate": 2.8866629197786944e-06, "loss": 0.2992, "step": 2414 }, { "epoch": 2.025440313111546, "grad_norm": 0.3779993731119467, "learning_rate": 2.8822400961935092e-06, "loss": 0.3149, "step": 2415 }, { "epoch": 2.026279004752586, "grad_norm": 0.36858668460724564, "learning_rate": 2.877819290994918e-06, "loss": 0.3188, "step": 2416 }, { "epoch": 2.027117696393626, "grad_norm": 0.3305206199046558, "learning_rate": 2.8734005083962857e-06, "loss": 0.2807, "step": 2417 }, { "epoch": 2.027956388034666, "grad_norm": 0.3691025976249277, "learning_rate": 2.868983752609046e-06, "loss": 0.302, "step": 2418 }, { "epoch": 2.0287950796757057, "grad_norm": 0.3783358507014406, "learning_rate": 2.8645690278427042e-06, "loss": 0.2922, "step": 2419 }, { "epoch": 2.029633771316746, "grad_norm": 0.3798587447266368, "learning_rate": 2.8601563383048314e-06, "loss": 0.3013, "step": 2420 }, { "epoch": 2.030472462957786, "grad_norm": 0.36398310053501914, "learning_rate": 2.8557456882010557e-06, "loss": 0.3355, "step": 2421 }, { "epoch": 2.031311154598826, "grad_norm": 0.32304069311284433, "learning_rate": 2.851337081735066e-06, "loss": 0.2602, "step": 2422 }, { "epoch": 2.0321498462398657, "grad_norm": 0.40037797531671315, "learning_rate": 2.846930523108595e-06, "loss": 0.3112, "step": 2423 }, { "epoch": 2.0329885378809056, "grad_norm": 0.36171126748768445, "learning_rate": 2.842526016521433e-06, "loss": 0.2967, "step": 2424 }, { "epoch": 2.033827229521946, "grad_norm": 0.36626100926655675, "learning_rate": 2.838123566171408e-06, "loss": 0.3378, "step": 2425 }, { "epoch": 2.034665921162986, "grad_norm": 0.3432155683061323, "learning_rate": 2.8337231762543905e-06, "loss": 0.3014, "step": 2426 }, { "epoch": 2.0355046128040257, "grad_norm": 0.4009066954709733, "learning_rate": 2.8293248509642914e-06, "loss": 0.3316, "step": 2427 }, { "epoch": 2.0363433044450656, "grad_norm": 0.33155358741355, "learning_rate": 2.8249285944930448e-06, "loss": 0.2804, "step": 2428 }, { "epoch": 2.0371819960861055, "grad_norm": 0.3198485899132724, "learning_rate": 2.820534411030621e-06, "loss": 0.292, "step": 2429 }, { "epoch": 2.038020687727146, "grad_norm": 0.3884165122529188, "learning_rate": 2.816142304765013e-06, "loss": 0.3555, "step": 2430 }, { "epoch": 2.0388593793681857, "grad_norm": 0.33174398572369357, "learning_rate": 2.811752279882228e-06, "loss": 0.2836, "step": 2431 }, { "epoch": 2.0396980710092256, "grad_norm": 0.35534598128753203, "learning_rate": 2.8073643405662985e-06, "loss": 0.3145, "step": 2432 }, { "epoch": 2.0405367626502655, "grad_norm": 0.3546914275999681, "learning_rate": 2.802978490999264e-06, "loss": 0.3166, "step": 2433 }, { "epoch": 2.0413754542913054, "grad_norm": 0.32877927856474554, "learning_rate": 2.798594735361173e-06, "loss": 0.2862, "step": 2434 }, { "epoch": 2.0422141459323457, "grad_norm": 0.3422782702353696, "learning_rate": 2.7942130778300815e-06, "loss": 0.2982, "step": 2435 }, { "epoch": 2.0430528375733856, "grad_norm": 0.36095438138488906, "learning_rate": 2.7898335225820376e-06, "loss": 0.3358, "step": 2436 }, { "epoch": 2.0438915292144255, "grad_norm": 0.3569336358406709, "learning_rate": 2.7854560737910975e-06, "loss": 0.3216, "step": 2437 }, { "epoch": 2.0447302208554654, "grad_norm": 0.3736331590313262, "learning_rate": 2.7810807356292972e-06, "loss": 0.3413, "step": 2438 }, { "epoch": 2.0455689124965053, "grad_norm": 0.31732102583862376, "learning_rate": 2.7767075122666707e-06, "loss": 0.2701, "step": 2439 }, { "epoch": 2.0464076041375456, "grad_norm": 0.3434643632454951, "learning_rate": 2.772336407871236e-06, "loss": 0.2938, "step": 2440 }, { "epoch": 2.0472462957785855, "grad_norm": 0.32494337108776966, "learning_rate": 2.767967426608983e-06, "loss": 0.2824, "step": 2441 }, { "epoch": 2.0480849874196254, "grad_norm": 0.34571160289971087, "learning_rate": 2.763600572643887e-06, "loss": 0.3235, "step": 2442 }, { "epoch": 2.0489236790606653, "grad_norm": 0.35103038239070394, "learning_rate": 2.7592358501378936e-06, "loss": 0.2897, "step": 2443 }, { "epoch": 2.049762370701705, "grad_norm": 0.37535624703243947, "learning_rate": 2.7548732632509134e-06, "loss": 0.2956, "step": 2444 }, { "epoch": 2.0506010623427455, "grad_norm": 0.3587939838398054, "learning_rate": 2.7505128161408252e-06, "loss": 0.3287, "step": 2445 }, { "epoch": 2.0514397539837854, "grad_norm": 0.34719985099512585, "learning_rate": 2.746154512963467e-06, "loss": 0.2871, "step": 2446 }, { "epoch": 2.0522784456248253, "grad_norm": 0.3096035647291714, "learning_rate": 2.7417983578726354e-06, "loss": 0.2876, "step": 2447 }, { "epoch": 2.053117137265865, "grad_norm": 0.34612273967850415, "learning_rate": 2.7374443550200792e-06, "loss": 0.2953, "step": 2448 }, { "epoch": 2.053955828906905, "grad_norm": 0.3910656590664713, "learning_rate": 2.7330925085554906e-06, "loss": 0.3127, "step": 2449 }, { "epoch": 2.0547945205479454, "grad_norm": 0.35447661654399015, "learning_rate": 2.7287428226265155e-06, "loss": 0.2947, "step": 2450 }, { "epoch": 2.0556332121889853, "grad_norm": 0.3388731577804611, "learning_rate": 2.724395301378733e-06, "loss": 0.3274, "step": 2451 }, { "epoch": 2.056471903830025, "grad_norm": 0.3381888589125051, "learning_rate": 2.7200499489556625e-06, "loss": 0.2714, "step": 2452 }, { "epoch": 2.057310595471065, "grad_norm": 0.3511994456210853, "learning_rate": 2.7157067694987592e-06, "loss": 0.3204, "step": 2453 }, { "epoch": 2.058149287112105, "grad_norm": 0.3539523331248711, "learning_rate": 2.711365767147399e-06, "loss": 0.3053, "step": 2454 }, { "epoch": 2.058987978753145, "grad_norm": 0.3331930987718739, "learning_rate": 2.7070269460388913e-06, "loss": 0.3147, "step": 2455 }, { "epoch": 2.059826670394185, "grad_norm": 0.33429220737423837, "learning_rate": 2.702690310308463e-06, "loss": 0.2856, "step": 2456 }, { "epoch": 2.060665362035225, "grad_norm": 0.34076490604220266, "learning_rate": 2.698355864089258e-06, "loss": 0.2917, "step": 2457 }, { "epoch": 2.061504053676265, "grad_norm": 0.3708376764742294, "learning_rate": 2.6940236115123357e-06, "loss": 0.292, "step": 2458 }, { "epoch": 2.062342745317305, "grad_norm": 0.37198737098444834, "learning_rate": 2.6896935567066594e-06, "loss": 0.3538, "step": 2459 }, { "epoch": 2.0631814369583448, "grad_norm": 0.3255854642927888, "learning_rate": 2.6853657037991044e-06, "loss": 0.2998, "step": 2460 }, { "epoch": 2.064020128599385, "grad_norm": 0.3677031713646057, "learning_rate": 2.681040056914444e-06, "loss": 0.3, "step": 2461 }, { "epoch": 2.064858820240425, "grad_norm": 0.3657866142944497, "learning_rate": 2.676716620175347e-06, "loss": 0.3012, "step": 2462 }, { "epoch": 2.065697511881465, "grad_norm": 0.3419436947395966, "learning_rate": 2.672395397702382e-06, "loss": 0.2913, "step": 2463 }, { "epoch": 2.0665362035225048, "grad_norm": 0.3652430911355143, "learning_rate": 2.668076393613999e-06, "loss": 0.3208, "step": 2464 }, { "epoch": 2.0673748951635447, "grad_norm": 0.35252620026992154, "learning_rate": 2.6637596120265408e-06, "loss": 0.2954, "step": 2465 }, { "epoch": 2.068213586804585, "grad_norm": 0.3361282709267618, "learning_rate": 2.6594450570542298e-06, "loss": 0.2844, "step": 2466 }, { "epoch": 2.069052278445625, "grad_norm": 0.3627196508926934, "learning_rate": 2.655132732809163e-06, "loss": 0.3373, "step": 2467 }, { "epoch": 2.069890970086665, "grad_norm": 0.36296991325280054, "learning_rate": 2.6508226434013155e-06, "loss": 0.31, "step": 2468 }, { "epoch": 2.0707296617277047, "grad_norm": 0.34351656814753356, "learning_rate": 2.6465147929385314e-06, "loss": 0.265, "step": 2469 }, { "epoch": 2.0715683533687446, "grad_norm": 0.35964155668662695, "learning_rate": 2.6422091855265195e-06, "loss": 0.3336, "step": 2470 }, { "epoch": 2.072407045009785, "grad_norm": 0.37167583324179204, "learning_rate": 2.637905825268856e-06, "loss": 0.3144, "step": 2471 }, { "epoch": 2.073245736650825, "grad_norm": 0.3400911957985594, "learning_rate": 2.6336047162669647e-06, "loss": 0.2971, "step": 2472 }, { "epoch": 2.0740844282918647, "grad_norm": 0.35563401845081904, "learning_rate": 2.629305862620134e-06, "loss": 0.3019, "step": 2473 }, { "epoch": 2.0749231199329046, "grad_norm": 0.3492508117483212, "learning_rate": 2.6250092684255e-06, "loss": 0.3019, "step": 2474 }, { "epoch": 2.0757618115739445, "grad_norm": 0.3575578228642408, "learning_rate": 2.6207149377780405e-06, "loss": 0.2893, "step": 2475 }, { "epoch": 2.076600503214985, "grad_norm": 0.3219697945347358, "learning_rate": 2.616422874770585e-06, "loss": 0.2756, "step": 2476 }, { "epoch": 2.0774391948560247, "grad_norm": 0.32813463995283415, "learning_rate": 2.612133083493792e-06, "loss": 0.2876, "step": 2477 }, { "epoch": 2.0782778864970646, "grad_norm": 0.3425407922773528, "learning_rate": 2.6078455680361602e-06, "loss": 0.3015, "step": 2478 }, { "epoch": 2.0791165781381045, "grad_norm": 0.3677626222565688, "learning_rate": 2.603560332484023e-06, "loss": 0.3332, "step": 2479 }, { "epoch": 2.0799552697791444, "grad_norm": 0.35750575382154465, "learning_rate": 2.599277380921531e-06, "loss": 0.294, "step": 2480 }, { "epoch": 2.0807939614201847, "grad_norm": 0.33886798179905675, "learning_rate": 2.5949967174306667e-06, "loss": 0.2928, "step": 2481 }, { "epoch": 2.0816326530612246, "grad_norm": 0.3439006834987065, "learning_rate": 2.5907183460912266e-06, "loss": 0.2924, "step": 2482 }, { "epoch": 2.0824713447022645, "grad_norm": 0.3554193007828603, "learning_rate": 2.5864422709808266e-06, "loss": 0.307, "step": 2483 }, { "epoch": 2.0833100363433044, "grad_norm": 0.33646669380712163, "learning_rate": 2.582168496174893e-06, "loss": 0.2934, "step": 2484 }, { "epoch": 2.0841487279843443, "grad_norm": 0.34325969260750056, "learning_rate": 2.577897025746655e-06, "loss": 0.3153, "step": 2485 }, { "epoch": 2.0849874196253846, "grad_norm": 0.3621508106079461, "learning_rate": 2.5736278637671524e-06, "loss": 0.2927, "step": 2486 }, { "epoch": 2.0858261112664245, "grad_norm": 0.3320518423356394, "learning_rate": 2.569361014305223e-06, "loss": 0.2971, "step": 2487 }, { "epoch": 2.0866648029074644, "grad_norm": 0.36396239310962275, "learning_rate": 2.565096481427496e-06, "loss": 0.2878, "step": 2488 }, { "epoch": 2.0875034945485043, "grad_norm": 0.3102001662604626, "learning_rate": 2.5608342691983982e-06, "loss": 0.2492, "step": 2489 }, { "epoch": 2.088342186189544, "grad_norm": 0.32624143580327414, "learning_rate": 2.5565743816801445e-06, "loss": 0.2894, "step": 2490 }, { "epoch": 2.0891808778305845, "grad_norm": 0.3695378489640502, "learning_rate": 2.552316822932729e-06, "loss": 0.3078, "step": 2491 }, { "epoch": 2.0900195694716244, "grad_norm": 0.37140291862151004, "learning_rate": 2.548061597013932e-06, "loss": 0.343, "step": 2492 }, { "epoch": 2.0908582611126643, "grad_norm": 0.32766989386616857, "learning_rate": 2.543808707979306e-06, "loss": 0.2971, "step": 2493 }, { "epoch": 2.091696952753704, "grad_norm": 0.3498482407996087, "learning_rate": 2.5395581598821807e-06, "loss": 0.3209, "step": 2494 }, { "epoch": 2.092535644394744, "grad_norm": 0.33621993540187584, "learning_rate": 2.5353099567736496e-06, "loss": 0.294, "step": 2495 }, { "epoch": 2.0933743360357844, "grad_norm": 0.3460845280365644, "learning_rate": 2.5310641027025768e-06, "loss": 0.3035, "step": 2496 }, { "epoch": 2.0942130276768243, "grad_norm": 0.3632168870570725, "learning_rate": 2.5268206017155855e-06, "loss": 0.2991, "step": 2497 }, { "epoch": 2.095051719317864, "grad_norm": 0.3566436711132335, "learning_rate": 2.5225794578570518e-06, "loss": 0.2961, "step": 2498 }, { "epoch": 2.095890410958904, "grad_norm": 0.37027716953056083, "learning_rate": 2.5183406751691113e-06, "loss": 0.3007, "step": 2499 }, { "epoch": 2.096729102599944, "grad_norm": 0.32308202857612534, "learning_rate": 2.5141042576916484e-06, "loss": 0.2889, "step": 2500 }, { "epoch": 2.0975677942409843, "grad_norm": 0.35868301812155223, "learning_rate": 2.509870209462289e-06, "loss": 0.3237, "step": 2501 }, { "epoch": 2.098406485882024, "grad_norm": 0.35020163912145763, "learning_rate": 2.5056385345164047e-06, "loss": 0.3006, "step": 2502 }, { "epoch": 2.099245177523064, "grad_norm": 0.3435486214545502, "learning_rate": 2.5014092368871064e-06, "loss": 0.3258, "step": 2503 }, { "epoch": 2.100083869164104, "grad_norm": 0.32036865647783436, "learning_rate": 2.4971823206052332e-06, "loss": 0.26, "step": 2504 }, { "epoch": 2.100922560805144, "grad_norm": 0.3855521960192477, "learning_rate": 2.49295778969936e-06, "loss": 0.3291, "step": 2505 }, { "epoch": 2.1017612524461837, "grad_norm": 0.36761903141715097, "learning_rate": 2.4887356481957876e-06, "loss": 0.3429, "step": 2506 }, { "epoch": 2.102599944087224, "grad_norm": 0.3255012411738421, "learning_rate": 2.484515900118538e-06, "loss": 0.2683, "step": 2507 }, { "epoch": 2.103438635728264, "grad_norm": 0.34909869555434175, "learning_rate": 2.4802985494893554e-06, "loss": 0.2898, "step": 2508 }, { "epoch": 2.104277327369304, "grad_norm": 0.3460384047628472, "learning_rate": 2.4760836003276918e-06, "loss": 0.3083, "step": 2509 }, { "epoch": 2.1051160190103437, "grad_norm": 0.35899262965051937, "learning_rate": 2.4718710566507187e-06, "loss": 0.3043, "step": 2510 }, { "epoch": 2.1059547106513836, "grad_norm": 0.3479780551784724, "learning_rate": 2.4676609224733085e-06, "loss": 0.3106, "step": 2511 }, { "epoch": 2.106793402292424, "grad_norm": 0.35535918148550266, "learning_rate": 2.4634532018080404e-06, "loss": 0.3059, "step": 2512 }, { "epoch": 2.107632093933464, "grad_norm": 0.3418167653876772, "learning_rate": 2.4592478986651957e-06, "loss": 0.31, "step": 2513 }, { "epoch": 2.1084707855745037, "grad_norm": 0.32775354540922375, "learning_rate": 2.455045017052745e-06, "loss": 0.3183, "step": 2514 }, { "epoch": 2.1093094772155436, "grad_norm": 0.3210054712805218, "learning_rate": 2.4508445609763564e-06, "loss": 0.2631, "step": 2515 }, { "epoch": 2.1101481688565835, "grad_norm": 0.3776073631223308, "learning_rate": 2.446646534439387e-06, "loss": 0.3042, "step": 2516 }, { "epoch": 2.110986860497624, "grad_norm": 0.3507860003400942, "learning_rate": 2.4424509414428717e-06, "loss": 0.3021, "step": 2517 }, { "epoch": 2.1118255521386637, "grad_norm": 0.3411071692744266, "learning_rate": 2.4382577859855327e-06, "loss": 0.297, "step": 2518 }, { "epoch": 2.1126642437797036, "grad_norm": 0.3686840984799874, "learning_rate": 2.4340670720637664e-06, "loss": 0.3027, "step": 2519 }, { "epoch": 2.1135029354207435, "grad_norm": 0.34545731277530495, "learning_rate": 2.429878803671643e-06, "loss": 0.2942, "step": 2520 }, { "epoch": 2.1143416270617834, "grad_norm": 0.3299535277438065, "learning_rate": 2.425692984800903e-06, "loss": 0.29, "step": 2521 }, { "epoch": 2.1151803187028237, "grad_norm": 0.3225092938043728, "learning_rate": 2.421509619440947e-06, "loss": 0.2811, "step": 2522 }, { "epoch": 2.1160190103438636, "grad_norm": 0.3595603796768058, "learning_rate": 2.417328711578845e-06, "loss": 0.3298, "step": 2523 }, { "epoch": 2.1168577019849035, "grad_norm": 0.3188419508021012, "learning_rate": 2.4131502651993174e-06, "loss": 0.2756, "step": 2524 }, { "epoch": 2.1176963936259434, "grad_norm": 0.3807671326165429, "learning_rate": 2.408974284284743e-06, "loss": 0.3138, "step": 2525 }, { "epoch": 2.1185350852669833, "grad_norm": 0.3667925196487643, "learning_rate": 2.4048007728151524e-06, "loss": 0.3029, "step": 2526 }, { "epoch": 2.1193737769080236, "grad_norm": 0.3430928435244441, "learning_rate": 2.400629734768216e-06, "loss": 0.2657, "step": 2527 }, { "epoch": 2.1202124685490635, "grad_norm": 0.35525878073217065, "learning_rate": 2.396461174119254e-06, "loss": 0.3053, "step": 2528 }, { "epoch": 2.1210511601901034, "grad_norm": 0.31907387343474813, "learning_rate": 2.392295094841223e-06, "loss": 0.2709, "step": 2529 }, { "epoch": 2.1218898518311433, "grad_norm": 0.366210244845289, "learning_rate": 2.3881315009047108e-06, "loss": 0.3318, "step": 2530 }, { "epoch": 2.122728543472183, "grad_norm": 0.35945645221040107, "learning_rate": 2.383970396277941e-06, "loss": 0.3014, "step": 2531 }, { "epoch": 2.1235672351132235, "grad_norm": 0.3420153519702846, "learning_rate": 2.379811784926764e-06, "loss": 0.3021, "step": 2532 }, { "epoch": 2.1244059267542634, "grad_norm": 0.32959153468741464, "learning_rate": 2.3756556708146537e-06, "loss": 0.2841, "step": 2533 }, { "epoch": 2.1252446183953033, "grad_norm": 0.3454164486862934, "learning_rate": 2.3715020579027053e-06, "loss": 0.326, "step": 2534 }, { "epoch": 2.126083310036343, "grad_norm": 0.3196778196660379, "learning_rate": 2.3673509501496243e-06, "loss": 0.288, "step": 2535 }, { "epoch": 2.126922001677383, "grad_norm": 0.3640564880672926, "learning_rate": 2.363202351511737e-06, "loss": 0.3662, "step": 2536 }, { "epoch": 2.1277606933184234, "grad_norm": 0.31207826546965844, "learning_rate": 2.35905626594297e-06, "loss": 0.2359, "step": 2537 }, { "epoch": 2.1285993849594633, "grad_norm": 0.3720691370158964, "learning_rate": 2.3549126973948604e-06, "loss": 0.3316, "step": 2538 }, { "epoch": 2.129438076600503, "grad_norm": 0.305762628356464, "learning_rate": 2.3507716498165478e-06, "loss": 0.2684, "step": 2539 }, { "epoch": 2.130276768241543, "grad_norm": 0.32414498353158117, "learning_rate": 2.346633127154761e-06, "loss": 0.2825, "step": 2540 }, { "epoch": 2.131115459882583, "grad_norm": 0.3292609839358416, "learning_rate": 2.3424971333538303e-06, "loss": 0.3051, "step": 2541 }, { "epoch": 2.1319541515236233, "grad_norm": 0.36473558209976215, "learning_rate": 2.3383636723556753e-06, "loss": 0.2903, "step": 2542 }, { "epoch": 2.132792843164663, "grad_norm": 0.35827579210705823, "learning_rate": 2.3342327480997947e-06, "loss": 0.3166, "step": 2543 }, { "epoch": 2.133631534805703, "grad_norm": 0.3497744213039817, "learning_rate": 2.330104364523276e-06, "loss": 0.3049, "step": 2544 }, { "epoch": 2.134470226446743, "grad_norm": 0.3435444056484605, "learning_rate": 2.3259785255607843e-06, "loss": 0.2752, "step": 2545 }, { "epoch": 2.135308918087783, "grad_norm": 0.3714294021959114, "learning_rate": 2.3218552351445573e-06, "loss": 0.3114, "step": 2546 }, { "epoch": 2.1361476097288232, "grad_norm": 0.3436031284774512, "learning_rate": 2.317734497204408e-06, "loss": 0.3302, "step": 2547 }, { "epoch": 2.136986301369863, "grad_norm": 0.35290678173653933, "learning_rate": 2.3136163156677095e-06, "loss": 0.3017, "step": 2548 }, { "epoch": 2.137824993010903, "grad_norm": 0.3697933342453876, "learning_rate": 2.309500694459407e-06, "loss": 0.3042, "step": 2549 }, { "epoch": 2.138663684651943, "grad_norm": 0.36161542857252704, "learning_rate": 2.305387637501996e-06, "loss": 0.3285, "step": 2550 }, { "epoch": 2.139502376292983, "grad_norm": 0.3568425681738327, "learning_rate": 2.3012771487155366e-06, "loss": 0.2916, "step": 2551 }, { "epoch": 2.140341067934023, "grad_norm": 0.33133100865710163, "learning_rate": 2.297169232017639e-06, "loss": 0.3062, "step": 2552 }, { "epoch": 2.141179759575063, "grad_norm": 0.32603297442278995, "learning_rate": 2.2930638913234583e-06, "loss": 0.279, "step": 2553 }, { "epoch": 2.142018451216103, "grad_norm": 0.3469880227614821, "learning_rate": 2.288961130545697e-06, "loss": 0.2912, "step": 2554 }, { "epoch": 2.142857142857143, "grad_norm": 0.35373224933607167, "learning_rate": 2.2848609535946002e-06, "loss": 0.2984, "step": 2555 }, { "epoch": 2.1436958344981827, "grad_norm": 0.3368262502458966, "learning_rate": 2.2807633643779486e-06, "loss": 0.2979, "step": 2556 }, { "epoch": 2.144534526139223, "grad_norm": 0.34669051654304167, "learning_rate": 2.2766683668010594e-06, "loss": 0.3021, "step": 2557 }, { "epoch": 2.145373217780263, "grad_norm": 0.33115845858067255, "learning_rate": 2.272575964766773e-06, "loss": 0.3037, "step": 2558 }, { "epoch": 2.146211909421303, "grad_norm": 0.337034195811845, "learning_rate": 2.2684861621754624e-06, "loss": 0.3093, "step": 2559 }, { "epoch": 2.1470506010623427, "grad_norm": 0.33584050006006994, "learning_rate": 2.2643989629250228e-06, "loss": 0.2817, "step": 2560 }, { "epoch": 2.1478892927033826, "grad_norm": 0.35417922719695605, "learning_rate": 2.2603143709108633e-06, "loss": 0.3049, "step": 2561 }, { "epoch": 2.148727984344423, "grad_norm": 0.3574830021558373, "learning_rate": 2.2562323900259155e-06, "loss": 0.2909, "step": 2562 }, { "epoch": 2.149566675985463, "grad_norm": 0.33935352035918026, "learning_rate": 2.2521530241606136e-06, "loss": 0.3343, "step": 2563 }, { "epoch": 2.1504053676265027, "grad_norm": 0.30796339214612817, "learning_rate": 2.2480762772029075e-06, "loss": 0.2825, "step": 2564 }, { "epoch": 2.1512440592675426, "grad_norm": 0.3707346370830002, "learning_rate": 2.244002153038248e-06, "loss": 0.3038, "step": 2565 }, { "epoch": 2.1520827509085825, "grad_norm": 0.35006031776081614, "learning_rate": 2.2399306555495824e-06, "loss": 0.2814, "step": 2566 }, { "epoch": 2.152921442549623, "grad_norm": 0.4049757333595306, "learning_rate": 2.235861788617361e-06, "loss": 0.341, "step": 2567 }, { "epoch": 2.1537601341906627, "grad_norm": 0.3470079671663448, "learning_rate": 2.231795556119524e-06, "loss": 0.2829, "step": 2568 }, { "epoch": 2.1545988258317026, "grad_norm": 0.31008322405268546, "learning_rate": 2.2277319619314996e-06, "loss": 0.2755, "step": 2569 }, { "epoch": 2.1554375174727425, "grad_norm": 0.3436366859646699, "learning_rate": 2.223671009926206e-06, "loss": 0.3193, "step": 2570 }, { "epoch": 2.1562762091137824, "grad_norm": 0.35845564192045065, "learning_rate": 2.2196127039740357e-06, "loss": 0.2908, "step": 2571 }, { "epoch": 2.1571149007548227, "grad_norm": 0.35765648348031615, "learning_rate": 2.2155570479428656e-06, "loss": 0.3023, "step": 2572 }, { "epoch": 2.1579535923958626, "grad_norm": 0.38976692067640983, "learning_rate": 2.2115040456980463e-06, "loss": 0.2899, "step": 2573 }, { "epoch": 2.1587922840369025, "grad_norm": 0.33394615767010594, "learning_rate": 2.2074537011023945e-06, "loss": 0.2998, "step": 2574 }, { "epoch": 2.1596309756779424, "grad_norm": 0.3545838541777967, "learning_rate": 2.2034060180162016e-06, "loss": 0.2785, "step": 2575 }, { "epoch": 2.1604696673189823, "grad_norm": 0.3293053174662852, "learning_rate": 2.1993610002972133e-06, "loss": 0.3102, "step": 2576 }, { "epoch": 2.161308358960022, "grad_norm": 0.33008480227080483, "learning_rate": 2.1953186518006418e-06, "loss": 0.2892, "step": 2577 }, { "epoch": 2.1621470506010625, "grad_norm": 0.32578418976293905, "learning_rate": 2.1912789763791566e-06, "loss": 0.2902, "step": 2578 }, { "epoch": 2.1629857422421024, "grad_norm": 0.38027073667175215, "learning_rate": 2.1872419778828706e-06, "loss": 0.3141, "step": 2579 }, { "epoch": 2.1638244338831423, "grad_norm": 0.3492394498942468, "learning_rate": 2.183207660159354e-06, "loss": 0.3153, "step": 2580 }, { "epoch": 2.164663125524182, "grad_norm": 0.34439883267861376, "learning_rate": 2.1791760270536182e-06, "loss": 0.297, "step": 2581 }, { "epoch": 2.165501817165222, "grad_norm": 0.3375320622331812, "learning_rate": 2.1751470824081183e-06, "loss": 0.2876, "step": 2582 }, { "epoch": 2.1663405088062624, "grad_norm": 0.3493804928585691, "learning_rate": 2.1711208300627463e-06, "loss": 0.3128, "step": 2583 }, { "epoch": 2.1671792004473023, "grad_norm": 0.33693649266945186, "learning_rate": 2.167097273854824e-06, "loss": 0.3046, "step": 2584 }, { "epoch": 2.168017892088342, "grad_norm": 0.34568134948771995, "learning_rate": 2.1630764176191095e-06, "loss": 0.3435, "step": 2585 }, { "epoch": 2.168856583729382, "grad_norm": 0.3132069415407871, "learning_rate": 2.1590582651877863e-06, "loss": 0.2644, "step": 2586 }, { "epoch": 2.169695275370422, "grad_norm": 0.3628347418067929, "learning_rate": 2.155042820390457e-06, "loss": 0.3176, "step": 2587 }, { "epoch": 2.1705339670114623, "grad_norm": 0.34250206213336615, "learning_rate": 2.1510300870541493e-06, "loss": 0.3015, "step": 2588 }, { "epoch": 2.171372658652502, "grad_norm": 0.33354262491864733, "learning_rate": 2.147020069003301e-06, "loss": 0.2552, "step": 2589 }, { "epoch": 2.172211350293542, "grad_norm": 0.34935388377346827, "learning_rate": 2.143012770059767e-06, "loss": 0.3166, "step": 2590 }, { "epoch": 2.173050041934582, "grad_norm": 0.3465046148871578, "learning_rate": 2.139008194042809e-06, "loss": 0.3182, "step": 2591 }, { "epoch": 2.173888733575622, "grad_norm": 0.35052105356731794, "learning_rate": 2.135006344769091e-06, "loss": 0.2835, "step": 2592 }, { "epoch": 2.174727425216662, "grad_norm": 0.32675874321797854, "learning_rate": 2.1310072260526814e-06, "loss": 0.2768, "step": 2593 }, { "epoch": 2.175566116857702, "grad_norm": 0.35729428032393723, "learning_rate": 2.127010841705046e-06, "loss": 0.3386, "step": 2594 }, { "epoch": 2.176404808498742, "grad_norm": 0.3048915290305508, "learning_rate": 2.1230171955350424e-06, "loss": 0.2628, "step": 2595 }, { "epoch": 2.177243500139782, "grad_norm": 0.3536630602888376, "learning_rate": 2.1190262913489222e-06, "loss": 0.341, "step": 2596 }, { "epoch": 2.1780821917808217, "grad_norm": 0.3608495941505196, "learning_rate": 2.1150381329503184e-06, "loss": 0.2607, "step": 2597 }, { "epoch": 2.178920883421862, "grad_norm": 0.35311042935395265, "learning_rate": 2.11105272414025e-06, "loss": 0.3281, "step": 2598 }, { "epoch": 2.179759575062902, "grad_norm": 0.3407815753225343, "learning_rate": 2.107070068717119e-06, "loss": 0.2836, "step": 2599 }, { "epoch": 2.180598266703942, "grad_norm": 0.3458583491886272, "learning_rate": 2.1030901704766944e-06, "loss": 0.318, "step": 2600 }, { "epoch": 2.1814369583449817, "grad_norm": 0.34593152967480734, "learning_rate": 2.0991130332121272e-06, "loss": 0.3011, "step": 2601 }, { "epoch": 2.1822756499860216, "grad_norm": 0.3728615656452443, "learning_rate": 2.0951386607139286e-06, "loss": 0.3201, "step": 2602 }, { "epoch": 2.183114341627062, "grad_norm": 0.40220300380308927, "learning_rate": 2.091167056769979e-06, "loss": 0.3189, "step": 2603 }, { "epoch": 2.183953033268102, "grad_norm": 0.32814666239797075, "learning_rate": 2.0871982251655216e-06, "loss": 0.2724, "step": 2604 }, { "epoch": 2.1847917249091418, "grad_norm": 0.38215821331363264, "learning_rate": 2.083232169683154e-06, "loss": 0.3552, "step": 2605 }, { "epoch": 2.1856304165501816, "grad_norm": 0.3191765983311514, "learning_rate": 2.0792688941028304e-06, "loss": 0.2717, "step": 2606 }, { "epoch": 2.1864691081912215, "grad_norm": 0.33449243237927306, "learning_rate": 2.0753084022018556e-06, "loss": 0.3266, "step": 2607 }, { "epoch": 2.187307799832262, "grad_norm": 0.30806643392093014, "learning_rate": 2.071350697754877e-06, "loss": 0.2701, "step": 2608 }, { "epoch": 2.1881464914733018, "grad_norm": 0.37279372491161145, "learning_rate": 2.0673957845338915e-06, "loss": 0.3136, "step": 2609 }, { "epoch": 2.1889851831143416, "grad_norm": 0.33709940795111537, "learning_rate": 2.0634436663082296e-06, "loss": 0.318, "step": 2610 }, { "epoch": 2.1898238747553815, "grad_norm": 0.34508737888032803, "learning_rate": 2.0594943468445634e-06, "loss": 0.288, "step": 2611 }, { "epoch": 2.1906625663964214, "grad_norm": 0.32955931568770735, "learning_rate": 2.0555478299068964e-06, "loss": 0.2739, "step": 2612 }, { "epoch": 2.1915012580374618, "grad_norm": 0.33347261286054836, "learning_rate": 2.051604119256557e-06, "loss": 0.3329, "step": 2613 }, { "epoch": 2.1923399496785017, "grad_norm": 0.33924902674658297, "learning_rate": 2.047663218652206e-06, "loss": 0.3406, "step": 2614 }, { "epoch": 2.1931786413195415, "grad_norm": 0.31444332655151674, "learning_rate": 2.0437251318498174e-06, "loss": 0.2832, "step": 2615 }, { "epoch": 2.1940173329605814, "grad_norm": 0.3637491562830696, "learning_rate": 2.03978986260269e-06, "loss": 0.3115, "step": 2616 }, { "epoch": 2.1948560246016213, "grad_norm": 0.3733273955143929, "learning_rate": 2.0358574146614363e-06, "loss": 0.3002, "step": 2617 }, { "epoch": 2.195694716242661, "grad_norm": 0.3528559454650805, "learning_rate": 2.0319277917739777e-06, "loss": 0.2893, "step": 2618 }, { "epoch": 2.1965334078837015, "grad_norm": 0.3368476748705262, "learning_rate": 2.0280009976855453e-06, "loss": 0.3117, "step": 2619 }, { "epoch": 2.1973720995247414, "grad_norm": 0.36486986076070577, "learning_rate": 2.024077036138674e-06, "loss": 0.3121, "step": 2620 }, { "epoch": 2.1982107911657813, "grad_norm": 0.35213808679192277, "learning_rate": 2.0201559108731954e-06, "loss": 0.2901, "step": 2621 }, { "epoch": 2.199049482806821, "grad_norm": 0.33623247984189575, "learning_rate": 2.0162376256262444e-06, "loss": 0.2423, "step": 2622 }, { "epoch": 2.199888174447861, "grad_norm": 0.36907447567727647, "learning_rate": 2.012322184132241e-06, "loss": 0.3227, "step": 2623 }, { "epoch": 2.2007268660889014, "grad_norm": 0.3455312572069842, "learning_rate": 2.0084095901229023e-06, "loss": 0.2925, "step": 2624 }, { "epoch": 2.2015655577299413, "grad_norm": 0.3361001947028389, "learning_rate": 2.004499847327229e-06, "loss": 0.3208, "step": 2625 }, { "epoch": 2.2024042493709812, "grad_norm": 0.3587681276857935, "learning_rate": 2.0005929594715017e-06, "loss": 0.302, "step": 2626 }, { "epoch": 2.203242941012021, "grad_norm": 0.35453042587240813, "learning_rate": 1.9966889302792834e-06, "loss": 0.3271, "step": 2627 }, { "epoch": 2.204081632653061, "grad_norm": 0.343009744131201, "learning_rate": 1.992787763471413e-06, "loss": 0.3031, "step": 2628 }, { "epoch": 2.2049203242941013, "grad_norm": 0.34164992991703286, "learning_rate": 1.988889462765997e-06, "loss": 0.2786, "step": 2629 }, { "epoch": 2.2057590159351412, "grad_norm": 0.3621807936328808, "learning_rate": 1.9849940318784144e-06, "loss": 0.3125, "step": 2630 }, { "epoch": 2.206597707576181, "grad_norm": 0.3465455466351959, "learning_rate": 1.981101474521308e-06, "loss": 0.2702, "step": 2631 }, { "epoch": 2.207436399217221, "grad_norm": 0.3617494411660968, "learning_rate": 1.9772117944045816e-06, "loss": 0.297, "step": 2632 }, { "epoch": 2.208275090858261, "grad_norm": 0.32635758358843914, "learning_rate": 1.9733249952353976e-06, "loss": 0.2794, "step": 2633 }, { "epoch": 2.2091137824993012, "grad_norm": 0.3294110598371337, "learning_rate": 1.9694410807181697e-06, "loss": 0.3007, "step": 2634 }, { "epoch": 2.209952474140341, "grad_norm": 0.32430555019106705, "learning_rate": 1.9655600545545673e-06, "loss": 0.307, "step": 2635 }, { "epoch": 2.210791165781381, "grad_norm": 0.3472537847741917, "learning_rate": 1.961681920443501e-06, "loss": 0.3014, "step": 2636 }, { "epoch": 2.211629857422421, "grad_norm": 0.35202662469960366, "learning_rate": 1.957806682081129e-06, "loss": 0.3102, "step": 2637 }, { "epoch": 2.212468549063461, "grad_norm": 0.3140050298182589, "learning_rate": 1.953934343160851e-06, "loss": 0.2962, "step": 2638 }, { "epoch": 2.213307240704501, "grad_norm": 0.34840200844040575, "learning_rate": 1.9500649073732985e-06, "loss": 0.3092, "step": 2639 }, { "epoch": 2.214145932345541, "grad_norm": 0.35723687578140184, "learning_rate": 1.9461983784063394e-06, "loss": 0.312, "step": 2640 }, { "epoch": 2.214984623986581, "grad_norm": 0.3145593400338818, "learning_rate": 1.9423347599450728e-06, "loss": 0.2898, "step": 2641 }, { "epoch": 2.215823315627621, "grad_norm": 0.3648394804978379, "learning_rate": 1.938474055671818e-06, "loss": 0.2889, "step": 2642 }, { "epoch": 2.2166620072686607, "grad_norm": 0.3713723922362455, "learning_rate": 1.9346162692661214e-06, "loss": 0.2947, "step": 2643 }, { "epoch": 2.217500698909701, "grad_norm": 0.3378274071547779, "learning_rate": 1.9307614044047483e-06, "loss": 0.3133, "step": 2644 }, { "epoch": 2.218339390550741, "grad_norm": 0.325888710868367, "learning_rate": 1.926909464761679e-06, "loss": 0.2923, "step": 2645 }, { "epoch": 2.219178082191781, "grad_norm": 0.3394939846282075, "learning_rate": 1.9230604540081067e-06, "loss": 0.3282, "step": 2646 }, { "epoch": 2.2200167738328207, "grad_norm": 0.3533046003071188, "learning_rate": 1.919214375812428e-06, "loss": 0.2857, "step": 2647 }, { "epoch": 2.2208554654738606, "grad_norm": 0.35563423531063704, "learning_rate": 1.9153712338402538e-06, "loss": 0.2967, "step": 2648 }, { "epoch": 2.221694157114901, "grad_norm": 0.3411361140767577, "learning_rate": 1.9115310317543856e-06, "loss": 0.2705, "step": 2649 }, { "epoch": 2.222532848755941, "grad_norm": 0.3637685915009607, "learning_rate": 1.9076937732148314e-06, "loss": 0.3013, "step": 2650 }, { "epoch": 2.2233715403969807, "grad_norm": 0.34019679009424064, "learning_rate": 1.9038594618787936e-06, "loss": 0.3305, "step": 2651 }, { "epoch": 2.2242102320380206, "grad_norm": 0.32914048306470595, "learning_rate": 1.9000281014006589e-06, "loss": 0.2678, "step": 2652 }, { "epoch": 2.2250489236790605, "grad_norm": 0.3427644123328107, "learning_rate": 1.8961996954320071e-06, "loss": 0.2954, "step": 2653 }, { "epoch": 2.225887615320101, "grad_norm": 0.3391245428151311, "learning_rate": 1.8923742476216016e-06, "loss": 0.306, "step": 2654 }, { "epoch": 2.2267263069611407, "grad_norm": 0.3342699138243395, "learning_rate": 1.888551761615386e-06, "loss": 0.3034, "step": 2655 }, { "epoch": 2.2275649986021806, "grad_norm": 0.353123370638281, "learning_rate": 1.8847322410564817e-06, "loss": 0.2986, "step": 2656 }, { "epoch": 2.2284036902432205, "grad_norm": 0.3496057122259953, "learning_rate": 1.8809156895851794e-06, "loss": 0.2805, "step": 2657 }, { "epoch": 2.2292423818842604, "grad_norm": 0.3546237452547108, "learning_rate": 1.877102110838946e-06, "loss": 0.2744, "step": 2658 }, { "epoch": 2.2300810735253007, "grad_norm": 0.337310628619916, "learning_rate": 1.8732915084524139e-06, "loss": 0.2743, "step": 2659 }, { "epoch": 2.2309197651663406, "grad_norm": 0.3238850632333124, "learning_rate": 1.8694838860573745e-06, "loss": 0.2753, "step": 2660 }, { "epoch": 2.2317584568073805, "grad_norm": 0.33183272364570976, "learning_rate": 1.8656792472827851e-06, "loss": 0.3021, "step": 2661 }, { "epoch": 2.2325971484484204, "grad_norm": 0.37104379117092884, "learning_rate": 1.861877595754753e-06, "loss": 0.3286, "step": 2662 }, { "epoch": 2.2334358400894603, "grad_norm": 0.3625991154597673, "learning_rate": 1.8580789350965444e-06, "loss": 0.309, "step": 2663 }, { "epoch": 2.2342745317305006, "grad_norm": 0.3479842677602441, "learning_rate": 1.8542832689285733e-06, "loss": 0.2904, "step": 2664 }, { "epoch": 2.2351132233715405, "grad_norm": 0.3443388380470523, "learning_rate": 1.8504906008683954e-06, "loss": 0.3016, "step": 2665 }, { "epoch": 2.2359519150125804, "grad_norm": 0.33527164074150395, "learning_rate": 1.846700934530715e-06, "loss": 0.3031, "step": 2666 }, { "epoch": 2.2367906066536203, "grad_norm": 0.3168981582656245, "learning_rate": 1.842914273527372e-06, "loss": 0.2702, "step": 2667 }, { "epoch": 2.23762929829466, "grad_norm": 0.3392537018237234, "learning_rate": 1.8391306214673431e-06, "loss": 0.3409, "step": 2668 }, { "epoch": 2.2384679899357005, "grad_norm": 0.35230919631241464, "learning_rate": 1.8353499819567393e-06, "loss": 0.3066, "step": 2669 }, { "epoch": 2.2393066815767404, "grad_norm": 0.3628155178477459, "learning_rate": 1.831572358598795e-06, "loss": 0.3002, "step": 2670 }, { "epoch": 2.2401453732177803, "grad_norm": 0.32662948747468057, "learning_rate": 1.8277977549938735e-06, "loss": 0.2757, "step": 2671 }, { "epoch": 2.24098406485882, "grad_norm": 0.34293476014057633, "learning_rate": 1.8240261747394628e-06, "loss": 0.3338, "step": 2672 }, { "epoch": 2.24182275649986, "grad_norm": 0.329254674719966, "learning_rate": 1.820257621430162e-06, "loss": 0.3278, "step": 2673 }, { "epoch": 2.2426614481409004, "grad_norm": 0.3332743840824493, "learning_rate": 1.8164920986576934e-06, "loss": 0.3035, "step": 2674 }, { "epoch": 2.2435001397819403, "grad_norm": 0.35067423020949345, "learning_rate": 1.8127296100108842e-06, "loss": 0.288, "step": 2675 }, { "epoch": 2.24433883142298, "grad_norm": 0.38616395229345984, "learning_rate": 1.8089701590756742e-06, "loss": 0.3319, "step": 2676 }, { "epoch": 2.24517752306402, "grad_norm": 0.3395120869446552, "learning_rate": 1.805213749435108e-06, "loss": 0.3018, "step": 2677 }, { "epoch": 2.24601621470506, "grad_norm": 0.3069342432847019, "learning_rate": 1.8014603846693268e-06, "loss": 0.2862, "step": 2678 }, { "epoch": 2.2468549063461003, "grad_norm": 0.3508849685027491, "learning_rate": 1.797710068355576e-06, "loss": 0.33, "step": 2679 }, { "epoch": 2.24769359798714, "grad_norm": 0.32651219959660477, "learning_rate": 1.7939628040681921e-06, "loss": 0.2768, "step": 2680 }, { "epoch": 2.24853228962818, "grad_norm": 0.3335698361022459, "learning_rate": 1.7902185953786039e-06, "loss": 0.2991, "step": 2681 }, { "epoch": 2.24937098126922, "grad_norm": 0.33974257156768617, "learning_rate": 1.7864774458553292e-06, "loss": 0.2916, "step": 2682 }, { "epoch": 2.25020967291026, "grad_norm": 0.34001506355077343, "learning_rate": 1.7827393590639646e-06, "loss": 0.3109, "step": 2683 }, { "epoch": 2.2510483645513, "grad_norm": 0.35155552919579885, "learning_rate": 1.7790043385671945e-06, "loss": 0.3186, "step": 2684 }, { "epoch": 2.25188705619234, "grad_norm": 0.32805039893652704, "learning_rate": 1.775272387924779e-06, "loss": 0.3003, "step": 2685 }, { "epoch": 2.25272574783338, "grad_norm": 0.33052519491823124, "learning_rate": 1.771543510693549e-06, "loss": 0.2708, "step": 2686 }, { "epoch": 2.25356443947442, "grad_norm": 0.32480429551888346, "learning_rate": 1.7678177104274108e-06, "loss": 0.295, "step": 2687 }, { "epoch": 2.2544031311154598, "grad_norm": 0.36531267690809166, "learning_rate": 1.7640949906773335e-06, "loss": 0.3272, "step": 2688 }, { "epoch": 2.2552418227565, "grad_norm": 0.3080721354172853, "learning_rate": 1.760375354991355e-06, "loss": 0.2747, "step": 2689 }, { "epoch": 2.25608051439754, "grad_norm": 0.33368341474242863, "learning_rate": 1.7566588069145719e-06, "loss": 0.3044, "step": 2690 }, { "epoch": 2.25691920603858, "grad_norm": 0.33381639976425137, "learning_rate": 1.7529453499891346e-06, "loss": 0.2974, "step": 2691 }, { "epoch": 2.2577578976796198, "grad_norm": 0.35037650171436924, "learning_rate": 1.749234987754253e-06, "loss": 0.3034, "step": 2692 }, { "epoch": 2.2585965893206597, "grad_norm": 0.32728849003866234, "learning_rate": 1.7455277237461853e-06, "loss": 0.3077, "step": 2693 }, { "epoch": 2.2594352809617, "grad_norm": 0.3156861374678649, "learning_rate": 1.741823561498236e-06, "loss": 0.2805, "step": 2694 }, { "epoch": 2.26027397260274, "grad_norm": 0.35008767694553533, "learning_rate": 1.7381225045407556e-06, "loss": 0.3257, "step": 2695 }, { "epoch": 2.2611126642437798, "grad_norm": 0.3435468851587439, "learning_rate": 1.7344245564011302e-06, "loss": 0.281, "step": 2696 }, { "epoch": 2.2619513558848197, "grad_norm": 0.32648307771143686, "learning_rate": 1.7307297206037882e-06, "loss": 0.2997, "step": 2697 }, { "epoch": 2.2627900475258595, "grad_norm": 0.32644248387696734, "learning_rate": 1.7270380006701915e-06, "loss": 0.3234, "step": 2698 }, { "epoch": 2.2636287391669, "grad_norm": 0.3262344620716764, "learning_rate": 1.7233494001188266e-06, "loss": 0.2822, "step": 2699 }, { "epoch": 2.2644674308079398, "grad_norm": 0.3367999284407887, "learning_rate": 1.719663922465215e-06, "loss": 0.3307, "step": 2700 }, { "epoch": 2.2653061224489797, "grad_norm": 0.3377854988196073, "learning_rate": 1.7159815712218942e-06, "loss": 0.2813, "step": 2701 }, { "epoch": 2.2661448140900196, "grad_norm": 0.3696864707704049, "learning_rate": 1.7123023498984265e-06, "loss": 0.2909, "step": 2702 }, { "epoch": 2.2669835057310594, "grad_norm": 0.3423547189385562, "learning_rate": 1.7086262620013917e-06, "loss": 0.3343, "step": 2703 }, { "epoch": 2.2678221973720993, "grad_norm": 0.35527961851122686, "learning_rate": 1.7049533110343801e-06, "loss": 0.292, "step": 2704 }, { "epoch": 2.2686608890131397, "grad_norm": 0.33550811141720926, "learning_rate": 1.7012835004979954e-06, "loss": 0.2764, "step": 2705 }, { "epoch": 2.2694995806541796, "grad_norm": 0.32416586484197474, "learning_rate": 1.6976168338898475e-06, "loss": 0.2859, "step": 2706 }, { "epoch": 2.2703382722952195, "grad_norm": 0.337938222467913, "learning_rate": 1.6939533147045462e-06, "loss": 0.3282, "step": 2707 }, { "epoch": 2.2711769639362593, "grad_norm": 0.321491384864321, "learning_rate": 1.690292946433707e-06, "loss": 0.2853, "step": 2708 }, { "epoch": 2.2720156555772992, "grad_norm": 0.32441615281012176, "learning_rate": 1.6866357325659378e-06, "loss": 0.3108, "step": 2709 }, { "epoch": 2.2728543472183396, "grad_norm": 0.3648187387422733, "learning_rate": 1.6829816765868429e-06, "loss": 0.3346, "step": 2710 }, { "epoch": 2.2736930388593795, "grad_norm": 0.31190390538640217, "learning_rate": 1.6793307819790184e-06, "loss": 0.2771, "step": 2711 }, { "epoch": 2.2745317305004193, "grad_norm": 0.32413896447905066, "learning_rate": 1.6756830522220413e-06, "loss": 0.3075, "step": 2712 }, { "epoch": 2.2753704221414592, "grad_norm": 0.324661870345905, "learning_rate": 1.6720384907924797e-06, "loss": 0.283, "step": 2713 }, { "epoch": 2.276209113782499, "grad_norm": 0.38168797176709, "learning_rate": 1.668397101163875e-06, "loss": 0.3219, "step": 2714 }, { "epoch": 2.2770478054235395, "grad_norm": 0.37831223660509244, "learning_rate": 1.6647588868067505e-06, "loss": 0.2969, "step": 2715 }, { "epoch": 2.2778864970645794, "grad_norm": 0.3288564969175175, "learning_rate": 1.661123851188602e-06, "loss": 0.2635, "step": 2716 }, { "epoch": 2.2787251887056192, "grad_norm": 0.3481841761746048, "learning_rate": 1.6574919977738951e-06, "loss": 0.3091, "step": 2717 }, { "epoch": 2.279563880346659, "grad_norm": 0.3222842786130358, "learning_rate": 1.6538633300240637e-06, "loss": 0.2787, "step": 2718 }, { "epoch": 2.280402571987699, "grad_norm": 0.3592156122955325, "learning_rate": 1.6502378513975059e-06, "loss": 0.3236, "step": 2719 }, { "epoch": 2.2812412636287394, "grad_norm": 0.3107162593677761, "learning_rate": 1.6466155653495752e-06, "loss": 0.2743, "step": 2720 }, { "epoch": 2.2820799552697792, "grad_norm": 0.3545081057001975, "learning_rate": 1.642996475332591e-06, "loss": 0.3138, "step": 2721 }, { "epoch": 2.282918646910819, "grad_norm": 0.30647956570881807, "learning_rate": 1.6393805847958177e-06, "loss": 0.266, "step": 2722 }, { "epoch": 2.283757338551859, "grad_norm": 0.3770938853305127, "learning_rate": 1.6357678971854763e-06, "loss": 0.3485, "step": 2723 }, { "epoch": 2.284596030192899, "grad_norm": 0.3057844678790352, "learning_rate": 1.6321584159447346e-06, "loss": 0.2768, "step": 2724 }, { "epoch": 2.285434721833939, "grad_norm": 0.32193468098366623, "learning_rate": 1.6285521445137016e-06, "loss": 0.2806, "step": 2725 }, { "epoch": 2.286273413474979, "grad_norm": 0.36348163795971244, "learning_rate": 1.6249490863294305e-06, "loss": 0.3251, "step": 2726 }, { "epoch": 2.287112105116019, "grad_norm": 0.3440869950742014, "learning_rate": 1.6213492448259073e-06, "loss": 0.3104, "step": 2727 }, { "epoch": 2.287950796757059, "grad_norm": 0.36111235075484943, "learning_rate": 1.6177526234340574e-06, "loss": 0.2867, "step": 2728 }, { "epoch": 2.288789488398099, "grad_norm": 0.3441916979668692, "learning_rate": 1.6141592255817356e-06, "loss": 0.2749, "step": 2729 }, { "epoch": 2.2896281800391387, "grad_norm": 0.3260128055621377, "learning_rate": 1.610569054693723e-06, "loss": 0.2977, "step": 2730 }, { "epoch": 2.290466871680179, "grad_norm": 0.34143914050579527, "learning_rate": 1.6069821141917263e-06, "loss": 0.3006, "step": 2731 }, { "epoch": 2.291305563321219, "grad_norm": 0.3111696826640819, "learning_rate": 1.6033984074943748e-06, "loss": 0.2817, "step": 2732 }, { "epoch": 2.292144254962259, "grad_norm": 0.3368710125515164, "learning_rate": 1.5998179380172113e-06, "loss": 0.3038, "step": 2733 }, { "epoch": 2.2929829466032987, "grad_norm": 0.4318175909664867, "learning_rate": 1.5962407091726988e-06, "loss": 0.2975, "step": 2734 }, { "epoch": 2.2938216382443386, "grad_norm": 0.35193883947023075, "learning_rate": 1.5926667243702066e-06, "loss": 0.319, "step": 2735 }, { "epoch": 2.294660329885379, "grad_norm": 0.3599884567509489, "learning_rate": 1.589095987016015e-06, "loss": 0.2893, "step": 2736 }, { "epoch": 2.295499021526419, "grad_norm": 0.3585647847343112, "learning_rate": 1.5855285005133114e-06, "loss": 0.3432, "step": 2737 }, { "epoch": 2.2963377131674587, "grad_norm": 0.34136976890346116, "learning_rate": 1.5819642682621788e-06, "loss": 0.3083, "step": 2738 }, { "epoch": 2.2971764048084986, "grad_norm": 0.33012968558787265, "learning_rate": 1.578403293659605e-06, "loss": 0.3132, "step": 2739 }, { "epoch": 2.2980150964495385, "grad_norm": 0.3058512599856345, "learning_rate": 1.5748455800994678e-06, "loss": 0.2899, "step": 2740 }, { "epoch": 2.298853788090579, "grad_norm": 0.3255866001212092, "learning_rate": 1.5712911309725405e-06, "loss": 0.3002, "step": 2741 }, { "epoch": 2.2996924797316187, "grad_norm": 0.3160128560203236, "learning_rate": 1.5677399496664836e-06, "loss": 0.3112, "step": 2742 }, { "epoch": 2.3005311713726586, "grad_norm": 0.302873732934724, "learning_rate": 1.5641920395658445e-06, "loss": 0.2511, "step": 2743 }, { "epoch": 2.3013698630136985, "grad_norm": 0.35337144928951464, "learning_rate": 1.5606474040520514e-06, "loss": 0.3193, "step": 2744 }, { "epoch": 2.3022085546547384, "grad_norm": 0.317405037542324, "learning_rate": 1.5571060465034137e-06, "loss": 0.2714, "step": 2745 }, { "epoch": 2.3030472462957787, "grad_norm": 0.33642003160165634, "learning_rate": 1.5535679702951123e-06, "loss": 0.3183, "step": 2746 }, { "epoch": 2.3038859379368186, "grad_norm": 0.36630645579797816, "learning_rate": 1.5500331787992057e-06, "loss": 0.3282, "step": 2747 }, { "epoch": 2.3047246295778585, "grad_norm": 0.33032215965142236, "learning_rate": 1.5465016753846173e-06, "loss": 0.2763, "step": 2748 }, { "epoch": 2.3055633212188984, "grad_norm": 0.3882786503147247, "learning_rate": 1.54297346341714e-06, "loss": 0.3046, "step": 2749 }, { "epoch": 2.3064020128599383, "grad_norm": 0.3177948786736908, "learning_rate": 1.5394485462594311e-06, "loss": 0.2819, "step": 2750 }, { "epoch": 2.3072407045009786, "grad_norm": 0.35985497419793355, "learning_rate": 1.535926927271001e-06, "loss": 0.3022, "step": 2751 }, { "epoch": 2.3080793961420185, "grad_norm": 0.3656516846672747, "learning_rate": 1.5324086098082235e-06, "loss": 0.3006, "step": 2752 }, { "epoch": 2.3089180877830584, "grad_norm": 0.3471782824427607, "learning_rate": 1.528893597224323e-06, "loss": 0.3109, "step": 2753 }, { "epoch": 2.3097567794240983, "grad_norm": 0.3542852111073725, "learning_rate": 1.5253818928693743e-06, "loss": 0.2858, "step": 2754 }, { "epoch": 2.310595471065138, "grad_norm": 0.3391226449855137, "learning_rate": 1.5218735000903007e-06, "loss": 0.2804, "step": 2755 }, { "epoch": 2.3114341627061785, "grad_norm": 0.35064924459502916, "learning_rate": 1.5183684222308653e-06, "loss": 0.3213, "step": 2756 }, { "epoch": 2.3122728543472184, "grad_norm": 0.32227307003046207, "learning_rate": 1.5148666626316755e-06, "loss": 0.3059, "step": 2757 }, { "epoch": 2.3131115459882583, "grad_norm": 0.339545639586852, "learning_rate": 1.511368224630177e-06, "loss": 0.3328, "step": 2758 }, { "epoch": 2.313950237629298, "grad_norm": 0.3427299651275656, "learning_rate": 1.507873111560645e-06, "loss": 0.3073, "step": 2759 }, { "epoch": 2.314788929270338, "grad_norm": 0.32878784377058573, "learning_rate": 1.5043813267541907e-06, "loss": 0.3057, "step": 2760 }, { "epoch": 2.3156276209113784, "grad_norm": 0.324318396632686, "learning_rate": 1.500892873538749e-06, "loss": 0.2914, "step": 2761 }, { "epoch": 2.3164663125524183, "grad_norm": 0.3636637319454061, "learning_rate": 1.4974077552390826e-06, "loss": 0.3041, "step": 2762 }, { "epoch": 2.317305004193458, "grad_norm": 0.3374390724016604, "learning_rate": 1.4939259751767771e-06, "loss": 0.3272, "step": 2763 }, { "epoch": 2.318143695834498, "grad_norm": 0.3182794620318502, "learning_rate": 1.4904475366702303e-06, "loss": 0.2687, "step": 2764 }, { "epoch": 2.318982387475538, "grad_norm": 0.350193242033916, "learning_rate": 1.4869724430346616e-06, "loss": 0.3063, "step": 2765 }, { "epoch": 2.3198210791165783, "grad_norm": 0.32297272017647044, "learning_rate": 1.4835006975820998e-06, "loss": 0.3072, "step": 2766 }, { "epoch": 2.320659770757618, "grad_norm": 0.33180675415127736, "learning_rate": 1.4800323036213825e-06, "loss": 0.3018, "step": 2767 }, { "epoch": 2.321498462398658, "grad_norm": 0.35759212623610637, "learning_rate": 1.4765672644581557e-06, "loss": 0.3116, "step": 2768 }, { "epoch": 2.322337154039698, "grad_norm": 0.319915934903655, "learning_rate": 1.4731055833948626e-06, "loss": 0.2682, "step": 2769 }, { "epoch": 2.323175845680738, "grad_norm": 0.3643513877729648, "learning_rate": 1.4696472637307503e-06, "loss": 0.3401, "step": 2770 }, { "epoch": 2.324014537321778, "grad_norm": 0.34259717917661897, "learning_rate": 1.4661923087618624e-06, "loss": 0.3133, "step": 2771 }, { "epoch": 2.324853228962818, "grad_norm": 0.3101108480980253, "learning_rate": 1.462740721781032e-06, "loss": 0.2872, "step": 2772 }, { "epoch": 2.325691920603858, "grad_norm": 0.36543463050697894, "learning_rate": 1.4592925060778862e-06, "loss": 0.3036, "step": 2773 }, { "epoch": 2.326530612244898, "grad_norm": 0.3296121008938425, "learning_rate": 1.4558476649388359e-06, "loss": 0.316, "step": 2774 }, { "epoch": 2.3273693038859378, "grad_norm": 0.3263556117697169, "learning_rate": 1.4524062016470781e-06, "loss": 0.3265, "step": 2775 }, { "epoch": 2.328207995526978, "grad_norm": 0.3019180561891279, "learning_rate": 1.4489681194825912e-06, "loss": 0.2713, "step": 2776 }, { "epoch": 2.329046687168018, "grad_norm": 0.32455760238444037, "learning_rate": 1.4455334217221263e-06, "loss": 0.2868, "step": 2777 }, { "epoch": 2.329885378809058, "grad_norm": 0.35614719116440063, "learning_rate": 1.4421021116392137e-06, "loss": 0.3237, "step": 2778 }, { "epoch": 2.3307240704500978, "grad_norm": 0.3215816255545798, "learning_rate": 1.4386741925041537e-06, "loss": 0.2775, "step": 2779 }, { "epoch": 2.3315627620911377, "grad_norm": 0.32416926193602985, "learning_rate": 1.4352496675840145e-06, "loss": 0.2738, "step": 2780 }, { "epoch": 2.332401453732178, "grad_norm": 0.3426941368893447, "learning_rate": 1.4318285401426302e-06, "loss": 0.2761, "step": 2781 }, { "epoch": 2.333240145373218, "grad_norm": 0.36589675626116935, "learning_rate": 1.4284108134405938e-06, "loss": 0.3371, "step": 2782 }, { "epoch": 2.334078837014258, "grad_norm": 0.3062176304717733, "learning_rate": 1.4249964907352603e-06, "loss": 0.2853, "step": 2783 }, { "epoch": 2.3349175286552977, "grad_norm": 0.31997979586489433, "learning_rate": 1.4215855752807416e-06, "loss": 0.2985, "step": 2784 }, { "epoch": 2.3357562202963376, "grad_norm": 0.3358111641888204, "learning_rate": 1.4181780703278963e-06, "loss": 0.3069, "step": 2785 }, { "epoch": 2.336594911937378, "grad_norm": 0.31753909055417223, "learning_rate": 1.4147739791243397e-06, "loss": 0.283, "step": 2786 }, { "epoch": 2.337433603578418, "grad_norm": 0.34218533311795424, "learning_rate": 1.4113733049144274e-06, "loss": 0.3472, "step": 2787 }, { "epoch": 2.3382722952194577, "grad_norm": 0.318609989084834, "learning_rate": 1.407976050939262e-06, "loss": 0.2638, "step": 2788 }, { "epoch": 2.3391109868604976, "grad_norm": 0.34489065118718465, "learning_rate": 1.4045822204366878e-06, "loss": 0.3097, "step": 2789 }, { "epoch": 2.3399496785015375, "grad_norm": 0.3439277733022927, "learning_rate": 1.4011918166412797e-06, "loss": 0.3014, "step": 2790 }, { "epoch": 2.340788370142578, "grad_norm": 0.32483213641412423, "learning_rate": 1.3978048427843538e-06, "loss": 0.3094, "step": 2791 }, { "epoch": 2.3416270617836177, "grad_norm": 0.3483673868492762, "learning_rate": 1.3944213020939528e-06, "loss": 0.3019, "step": 2792 }, { "epoch": 2.3424657534246576, "grad_norm": 0.3427464554198812, "learning_rate": 1.3910411977948508e-06, "loss": 0.2929, "step": 2793 }, { "epoch": 2.3433044450656975, "grad_norm": 0.33938021393867873, "learning_rate": 1.3876645331085448e-06, "loss": 0.3095, "step": 2794 }, { "epoch": 2.3441431367067374, "grad_norm": 0.322275280447342, "learning_rate": 1.3842913112532507e-06, "loss": 0.2824, "step": 2795 }, { "epoch": 2.3449818283477777, "grad_norm": 0.3230071633579357, "learning_rate": 1.3809215354439082e-06, "loss": 0.2801, "step": 2796 }, { "epoch": 2.3458205199888176, "grad_norm": 0.36706118254737247, "learning_rate": 1.3775552088921712e-06, "loss": 0.3478, "step": 2797 }, { "epoch": 2.3466592116298575, "grad_norm": 0.3457601967442512, "learning_rate": 1.3741923348064034e-06, "loss": 0.2919, "step": 2798 }, { "epoch": 2.3474979032708974, "grad_norm": 0.3543779975233771, "learning_rate": 1.3708329163916823e-06, "loss": 0.3064, "step": 2799 }, { "epoch": 2.3483365949119372, "grad_norm": 0.3093726183793775, "learning_rate": 1.367476956849787e-06, "loss": 0.2678, "step": 2800 }, { "epoch": 2.3491752865529776, "grad_norm": 0.33418532897088343, "learning_rate": 1.364124459379204e-06, "loss": 0.2745, "step": 2801 }, { "epoch": 2.3500139781940175, "grad_norm": 0.3575733621838256, "learning_rate": 1.3607754271751201e-06, "loss": 0.3435, "step": 2802 }, { "epoch": 2.3508526698350574, "grad_norm": 0.33818642986237296, "learning_rate": 1.3574298634294164e-06, "loss": 0.2757, "step": 2803 }, { "epoch": 2.3516913614760973, "grad_norm": 0.3298979965909829, "learning_rate": 1.354087771330671e-06, "loss": 0.2979, "step": 2804 }, { "epoch": 2.352530053117137, "grad_norm": 0.33339988090864864, "learning_rate": 1.3507491540641537e-06, "loss": 0.3352, "step": 2805 }, { "epoch": 2.3533687447581775, "grad_norm": 0.31621272825524704, "learning_rate": 1.3474140148118186e-06, "loss": 0.3129, "step": 2806 }, { "epoch": 2.3542074363992174, "grad_norm": 0.33869137504199953, "learning_rate": 1.3440823567523104e-06, "loss": 0.3091, "step": 2807 }, { "epoch": 2.3550461280402573, "grad_norm": 0.3461009493533205, "learning_rate": 1.34075418306095e-06, "loss": 0.2632, "step": 2808 }, { "epoch": 2.355884819681297, "grad_norm": 0.35849438831356684, "learning_rate": 1.3374294969097423e-06, "loss": 0.3083, "step": 2809 }, { "epoch": 2.356723511322337, "grad_norm": 0.33041881860795147, "learning_rate": 1.3341083014673678e-06, "loss": 0.2836, "step": 2810 }, { "epoch": 2.3575622029633774, "grad_norm": 0.31771197762007075, "learning_rate": 1.3307905998991766e-06, "loss": 0.302, "step": 2811 }, { "epoch": 2.3584008946044173, "grad_norm": 0.3456832651594193, "learning_rate": 1.3274763953671933e-06, "loss": 0.31, "step": 2812 }, { "epoch": 2.359239586245457, "grad_norm": 0.33552038943199236, "learning_rate": 1.3241656910301043e-06, "loss": 0.3037, "step": 2813 }, { "epoch": 2.360078277886497, "grad_norm": 0.32807201213090625, "learning_rate": 1.3208584900432653e-06, "loss": 0.2996, "step": 2814 }, { "epoch": 2.360916969527537, "grad_norm": 0.3331154966721885, "learning_rate": 1.317554795558691e-06, "loss": 0.3035, "step": 2815 }, { "epoch": 2.3617556611685773, "grad_norm": 0.34059175839647626, "learning_rate": 1.3142546107250536e-06, "loss": 0.2741, "step": 2816 }, { "epoch": 2.362594352809617, "grad_norm": 0.3168896822809175, "learning_rate": 1.3109579386876808e-06, "loss": 0.2768, "step": 2817 }, { "epoch": 2.363433044450657, "grad_norm": 0.35780736253137085, "learning_rate": 1.3076647825885542e-06, "loss": 0.3147, "step": 2818 }, { "epoch": 2.364271736091697, "grad_norm": 0.30883076381665964, "learning_rate": 1.304375145566299e-06, "loss": 0.2683, "step": 2819 }, { "epoch": 2.365110427732737, "grad_norm": 0.3282961110116807, "learning_rate": 1.301089030756193e-06, "loss": 0.303, "step": 2820 }, { "epoch": 2.3659491193737767, "grad_norm": 0.3420280938446541, "learning_rate": 1.2978064412901514e-06, "loss": 0.2967, "step": 2821 }, { "epoch": 2.366787811014817, "grad_norm": 0.33308621385493176, "learning_rate": 1.294527380296734e-06, "loss": 0.2853, "step": 2822 }, { "epoch": 2.367626502655857, "grad_norm": 0.3381480907104251, "learning_rate": 1.2912518509011362e-06, "loss": 0.3031, "step": 2823 }, { "epoch": 2.368465194296897, "grad_norm": 0.3047269036959224, "learning_rate": 1.2879798562251856e-06, "loss": 0.2587, "step": 2824 }, { "epoch": 2.3693038859379367, "grad_norm": 0.35110727812645653, "learning_rate": 1.2847113993873445e-06, "loss": 0.3354, "step": 2825 }, { "epoch": 2.3701425775789766, "grad_norm": 0.3207639013546457, "learning_rate": 1.2814464835026985e-06, "loss": 0.3212, "step": 2826 }, { "epoch": 2.370981269220017, "grad_norm": 0.3316270678639783, "learning_rate": 1.278185111682963e-06, "loss": 0.2921, "step": 2827 }, { "epoch": 2.371819960861057, "grad_norm": 0.34803904199229635, "learning_rate": 1.2749272870364737e-06, "loss": 0.2938, "step": 2828 }, { "epoch": 2.3726586525020967, "grad_norm": 0.3142979750439338, "learning_rate": 1.271673012668186e-06, "loss": 0.2663, "step": 2829 }, { "epoch": 2.3734973441431366, "grad_norm": 0.3775654910784047, "learning_rate": 1.2684222916796712e-06, "loss": 0.3151, "step": 2830 }, { "epoch": 2.3743360357841765, "grad_norm": 0.309494626215959, "learning_rate": 1.265175127169116e-06, "loss": 0.2816, "step": 2831 }, { "epoch": 2.375174727425217, "grad_norm": 0.3540774568860041, "learning_rate": 1.2619315222313121e-06, "loss": 0.3227, "step": 2832 }, { "epoch": 2.3760134190662567, "grad_norm": 0.32841337482216965, "learning_rate": 1.2586914799576654e-06, "loss": 0.312, "step": 2833 }, { "epoch": 2.3768521107072966, "grad_norm": 0.36558270721706476, "learning_rate": 1.2554550034361807e-06, "loss": 0.3088, "step": 2834 }, { "epoch": 2.3776908023483365, "grad_norm": 0.34744316558833416, "learning_rate": 1.2522220957514676e-06, "loss": 0.2854, "step": 2835 }, { "epoch": 2.3785294939893764, "grad_norm": 0.3203355001063661, "learning_rate": 1.2489927599847352e-06, "loss": 0.2928, "step": 2836 }, { "epoch": 2.3793681856304167, "grad_norm": 0.3394420493009664, "learning_rate": 1.245766999213784e-06, "loss": 0.3005, "step": 2837 }, { "epoch": 2.3802068772714566, "grad_norm": 0.3265317695895222, "learning_rate": 1.2425448165130122e-06, "loss": 0.3206, "step": 2838 }, { "epoch": 2.3810455689124965, "grad_norm": 0.33337061014411684, "learning_rate": 1.239326214953403e-06, "loss": 0.3306, "step": 2839 }, { "epoch": 2.3818842605535364, "grad_norm": 0.33189147188897644, "learning_rate": 1.2361111976025303e-06, "loss": 0.317, "step": 2840 }, { "epoch": 2.3827229521945763, "grad_norm": 0.3608368761685837, "learning_rate": 1.2328997675245501e-06, "loss": 0.3106, "step": 2841 }, { "epoch": 2.383561643835616, "grad_norm": 0.34218255905427225, "learning_rate": 1.2296919277802017e-06, "loss": 0.2858, "step": 2842 }, { "epoch": 2.3844003354766565, "grad_norm": 0.36052814481291645, "learning_rate": 1.226487681426799e-06, "loss": 0.2978, "step": 2843 }, { "epoch": 2.3852390271176964, "grad_norm": 0.3499020567387695, "learning_rate": 1.2232870315182354e-06, "loss": 0.3276, "step": 2844 }, { "epoch": 2.3860777187587363, "grad_norm": 0.35433702314624543, "learning_rate": 1.220089981104971e-06, "loss": 0.337, "step": 2845 }, { "epoch": 2.386916410399776, "grad_norm": 0.30423900470163256, "learning_rate": 1.2168965332340422e-06, "loss": 0.2464, "step": 2846 }, { "epoch": 2.387755102040816, "grad_norm": 0.31508394503507003, "learning_rate": 1.2137066909490441e-06, "loss": 0.3199, "step": 2847 }, { "epoch": 2.3885937936818564, "grad_norm": 0.332088802145555, "learning_rate": 1.2105204572901413e-06, "loss": 0.3032, "step": 2848 }, { "epoch": 2.3894324853228963, "grad_norm": 0.32682160599450183, "learning_rate": 1.207337835294059e-06, "loss": 0.2799, "step": 2849 }, { "epoch": 2.390271176963936, "grad_norm": 0.3489785333939202, "learning_rate": 1.2041588279940758e-06, "loss": 0.3247, "step": 2850 }, { "epoch": 2.391109868604976, "grad_norm": 0.34137762667763294, "learning_rate": 1.2009834384200291e-06, "loss": 0.2922, "step": 2851 }, { "epoch": 2.391948560246016, "grad_norm": 0.33375684645974163, "learning_rate": 1.197811669598307e-06, "loss": 0.2921, "step": 2852 }, { "epoch": 2.3927872518870563, "grad_norm": 0.3424935090553464, "learning_rate": 1.1946435245518478e-06, "loss": 0.2985, "step": 2853 }, { "epoch": 2.393625943528096, "grad_norm": 0.35831739455128975, "learning_rate": 1.1914790063001358e-06, "loss": 0.3119, "step": 2854 }, { "epoch": 2.394464635169136, "grad_norm": 0.3291051400544234, "learning_rate": 1.1883181178591952e-06, "loss": 0.2864, "step": 2855 }, { "epoch": 2.395303326810176, "grad_norm": 0.3648992728233546, "learning_rate": 1.185160862241595e-06, "loss": 0.3103, "step": 2856 }, { "epoch": 2.396142018451216, "grad_norm": 0.33299787649689633, "learning_rate": 1.1820072424564426e-06, "loss": 0.2933, "step": 2857 }, { "epoch": 2.396980710092256, "grad_norm": 0.33815796837914774, "learning_rate": 1.178857261509374e-06, "loss": 0.3142, "step": 2858 }, { "epoch": 2.397819401733296, "grad_norm": 0.3232980300081425, "learning_rate": 1.1757109224025654e-06, "loss": 0.3248, "step": 2859 }, { "epoch": 2.398658093374336, "grad_norm": 0.30893886869592047, "learning_rate": 1.1725682281347133e-06, "loss": 0.2873, "step": 2860 }, { "epoch": 2.399496785015376, "grad_norm": 0.3514228816957543, "learning_rate": 1.1694291817010477e-06, "loss": 0.3088, "step": 2861 }, { "epoch": 2.400335476656416, "grad_norm": 0.32385075997953316, "learning_rate": 1.1662937860933198e-06, "loss": 0.2786, "step": 2862 }, { "epoch": 2.401174168297456, "grad_norm": 0.36051610883599694, "learning_rate": 1.163162044299798e-06, "loss": 0.3401, "step": 2863 }, { "epoch": 2.402012859938496, "grad_norm": 0.33237948314969956, "learning_rate": 1.1600339593052723e-06, "loss": 0.2931, "step": 2864 }, { "epoch": 2.402851551579536, "grad_norm": 0.3392233076627164, "learning_rate": 1.1569095340910458e-06, "loss": 0.2712, "step": 2865 }, { "epoch": 2.403690243220576, "grad_norm": 0.323826627439509, "learning_rate": 1.1537887716349339e-06, "loss": 0.3017, "step": 2866 }, { "epoch": 2.4045289348616157, "grad_norm": 0.32441802466656233, "learning_rate": 1.1506716749112628e-06, "loss": 0.3074, "step": 2867 }, { "epoch": 2.405367626502656, "grad_norm": 0.33060937667243384, "learning_rate": 1.1475582468908603e-06, "loss": 0.2998, "step": 2868 }, { "epoch": 2.406206318143696, "grad_norm": 0.34673691835460463, "learning_rate": 1.144448490541062e-06, "loss": 0.325, "step": 2869 }, { "epoch": 2.407045009784736, "grad_norm": 0.3177328595077884, "learning_rate": 1.1413424088257037e-06, "loss": 0.2989, "step": 2870 }, { "epoch": 2.4078837014257757, "grad_norm": 0.32447582233140165, "learning_rate": 1.1382400047051156e-06, "loss": 0.2837, "step": 2871 }, { "epoch": 2.4087223930668156, "grad_norm": 0.33186116836765506, "learning_rate": 1.1351412811361284e-06, "loss": 0.3353, "step": 2872 }, { "epoch": 2.409561084707856, "grad_norm": 0.3166041891425152, "learning_rate": 1.132046241072059e-06, "loss": 0.2875, "step": 2873 }, { "epoch": 2.410399776348896, "grad_norm": 0.34931915120097107, "learning_rate": 1.128954887462717e-06, "loss": 0.2935, "step": 2874 }, { "epoch": 2.4112384679899357, "grad_norm": 0.34227805935200706, "learning_rate": 1.1258672232544005e-06, "loss": 0.3069, "step": 2875 }, { "epoch": 2.4120771596309756, "grad_norm": 0.3300535602468236, "learning_rate": 1.122783251389885e-06, "loss": 0.3055, "step": 2876 }, { "epoch": 2.4129158512720155, "grad_norm": 0.3463444342199106, "learning_rate": 1.1197029748084326e-06, "loss": 0.3376, "step": 2877 }, { "epoch": 2.413754542913056, "grad_norm": 0.3236568303508521, "learning_rate": 1.1166263964457819e-06, "loss": 0.2733, "step": 2878 }, { "epoch": 2.4145932345540957, "grad_norm": 0.35920568915281564, "learning_rate": 1.1135535192341462e-06, "loss": 0.3185, "step": 2879 }, { "epoch": 2.4154319261951356, "grad_norm": 0.3306785274082638, "learning_rate": 1.110484346102213e-06, "loss": 0.2813, "step": 2880 }, { "epoch": 2.4162706178361755, "grad_norm": 0.33533384535455873, "learning_rate": 1.1074188799751363e-06, "loss": 0.2819, "step": 2881 }, { "epoch": 2.4171093094772154, "grad_norm": 0.3327472919833584, "learning_rate": 1.1043571237745387e-06, "loss": 0.2912, "step": 2882 }, { "epoch": 2.4179480011182557, "grad_norm": 0.32856834532049156, "learning_rate": 1.1012990804185103e-06, "loss": 0.319, "step": 2883 }, { "epoch": 2.4187866927592956, "grad_norm": 0.34843009434807665, "learning_rate": 1.0982447528215955e-06, "loss": 0.3279, "step": 2884 }, { "epoch": 2.4196253844003355, "grad_norm": 0.3285150095574981, "learning_rate": 1.095194143894805e-06, "loss": 0.2969, "step": 2885 }, { "epoch": 2.4204640760413754, "grad_norm": 0.32341374951676377, "learning_rate": 1.0921472565455983e-06, "loss": 0.2813, "step": 2886 }, { "epoch": 2.4213027676824153, "grad_norm": 0.36411606018092857, "learning_rate": 1.0891040936778923e-06, "loss": 0.3035, "step": 2887 }, { "epoch": 2.4221414593234556, "grad_norm": 0.3506166257874569, "learning_rate": 1.0860646581920554e-06, "loss": 0.3148, "step": 2888 }, { "epoch": 2.4229801509644955, "grad_norm": 0.3166008466998106, "learning_rate": 1.0830289529848976e-06, "loss": 0.3167, "step": 2889 }, { "epoch": 2.4238188426055354, "grad_norm": 0.3173267429230855, "learning_rate": 1.079996980949679e-06, "loss": 0.2928, "step": 2890 }, { "epoch": 2.4246575342465753, "grad_norm": 0.3215416893713854, "learning_rate": 1.0769687449761008e-06, "loss": 0.2972, "step": 2891 }, { "epoch": 2.425496225887615, "grad_norm": 0.30694408952369057, "learning_rate": 1.073944247950302e-06, "loss": 0.2667, "step": 2892 }, { "epoch": 2.4263349175286555, "grad_norm": 0.35481004797486404, "learning_rate": 1.0709234927548602e-06, "loss": 0.34, "step": 2893 }, { "epoch": 2.4271736091696954, "grad_norm": 0.34710004870693195, "learning_rate": 1.0679064822687824e-06, "loss": 0.2918, "step": 2894 }, { "epoch": 2.4280123008107353, "grad_norm": 0.33108087527789676, "learning_rate": 1.064893219367511e-06, "loss": 0.2908, "step": 2895 }, { "epoch": 2.428850992451775, "grad_norm": 0.32773861808393545, "learning_rate": 1.0618837069229164e-06, "loss": 0.2827, "step": 2896 }, { "epoch": 2.429689684092815, "grad_norm": 0.33015114808584495, "learning_rate": 1.0588779478032918e-06, "loss": 0.2719, "step": 2897 }, { "epoch": 2.4305283757338554, "grad_norm": 0.3649735496603141, "learning_rate": 1.0558759448733557e-06, "loss": 0.3145, "step": 2898 }, { "epoch": 2.4313670673748953, "grad_norm": 0.3318370563177636, "learning_rate": 1.0528777009942442e-06, "loss": 0.2902, "step": 2899 }, { "epoch": 2.432205759015935, "grad_norm": 0.3419170687929194, "learning_rate": 1.0498832190235131e-06, "loss": 0.3059, "step": 2900 }, { "epoch": 2.433044450656975, "grad_norm": 0.3431490569498522, "learning_rate": 1.046892501815132e-06, "loss": 0.3061, "step": 2901 }, { "epoch": 2.433883142298015, "grad_norm": 0.3239937048781704, "learning_rate": 1.0439055522194824e-06, "loss": 0.2733, "step": 2902 }, { "epoch": 2.4347218339390553, "grad_norm": 0.34300958140180793, "learning_rate": 1.040922373083356e-06, "loss": 0.3082, "step": 2903 }, { "epoch": 2.435560525580095, "grad_norm": 0.32597453628306533, "learning_rate": 1.037942967249948e-06, "loss": 0.2866, "step": 2904 }, { "epoch": 2.436399217221135, "grad_norm": 0.3558470725820328, "learning_rate": 1.0349673375588599e-06, "loss": 0.3032, "step": 2905 }, { "epoch": 2.437237908862175, "grad_norm": 0.3273750406579612, "learning_rate": 1.0319954868460946e-06, "loss": 0.2826, "step": 2906 }, { "epoch": 2.438076600503215, "grad_norm": 0.36766592407127885, "learning_rate": 1.0290274179440502e-06, "loss": 0.3723, "step": 2907 }, { "epoch": 2.438915292144255, "grad_norm": 0.298784183401314, "learning_rate": 1.0260631336815236e-06, "loss": 0.2654, "step": 2908 }, { "epoch": 2.439753983785295, "grad_norm": 0.35789895890561846, "learning_rate": 1.0231026368837048e-06, "loss": 0.3326, "step": 2909 }, { "epoch": 2.440592675426335, "grad_norm": 0.33504257132076487, "learning_rate": 1.0201459303721716e-06, "loss": 0.3185, "step": 2910 }, { "epoch": 2.441431367067375, "grad_norm": 0.3334149885513968, "learning_rate": 1.0171930169648913e-06, "loss": 0.2914, "step": 2911 }, { "epoch": 2.4422700587084147, "grad_norm": 0.3595144402714006, "learning_rate": 1.0142438994762143e-06, "loss": 0.3172, "step": 2912 }, { "epoch": 2.443108750349455, "grad_norm": 0.33942098204440474, "learning_rate": 1.0112985807168752e-06, "loss": 0.3065, "step": 2913 }, { "epoch": 2.443947441990495, "grad_norm": 0.31662808494932443, "learning_rate": 1.0083570634939866e-06, "loss": 0.2854, "step": 2914 }, { "epoch": 2.444786133631535, "grad_norm": 0.3417776328663517, "learning_rate": 1.0054193506110394e-06, "loss": 0.3103, "step": 2915 }, { "epoch": 2.4456248252725747, "grad_norm": 0.3182736676004334, "learning_rate": 1.0024854448678988e-06, "loss": 0.2838, "step": 2916 }, { "epoch": 2.4464635169136146, "grad_norm": 0.3694629055283217, "learning_rate": 9.995553490607984e-07, "loss": 0.3205, "step": 2917 }, { "epoch": 2.447302208554655, "grad_norm": 0.3129181685756651, "learning_rate": 9.966290659823435e-07, "loss": 0.2905, "step": 2918 }, { "epoch": 2.448140900195695, "grad_norm": 0.33415754739201936, "learning_rate": 9.937065984215072e-07, "loss": 0.3388, "step": 2919 }, { "epoch": 2.4489795918367347, "grad_norm": 0.31969714207899397, "learning_rate": 9.907879491636202e-07, "loss": 0.2982, "step": 2920 }, { "epoch": 2.4498182834777746, "grad_norm": 0.3459624594455812, "learning_rate": 9.87873120990379e-07, "loss": 0.3214, "step": 2921 }, { "epoch": 2.4506569751188145, "grad_norm": 0.34440929523759084, "learning_rate": 9.84962116679839e-07, "loss": 0.2986, "step": 2922 }, { "epoch": 2.451495666759855, "grad_norm": 0.3095090070222022, "learning_rate": 9.820549390064061e-07, "loss": 0.2777, "step": 2923 }, { "epoch": 2.4523343584008948, "grad_norm": 0.3283287688199234, "learning_rate": 9.79151590740845e-07, "loss": 0.2839, "step": 2924 }, { "epoch": 2.4531730500419346, "grad_norm": 0.35500478157601345, "learning_rate": 9.762520746502645e-07, "loss": 0.3115, "step": 2925 }, { "epoch": 2.4540117416829745, "grad_norm": 0.33242718208367894, "learning_rate": 9.733563934981271e-07, "loss": 0.2972, "step": 2926 }, { "epoch": 2.4548504333240144, "grad_norm": 0.32513228826821594, "learning_rate": 9.704645500442367e-07, "loss": 0.3205, "step": 2927 }, { "epoch": 2.4556891249650548, "grad_norm": 0.3160980362381053, "learning_rate": 9.675765470447413e-07, "loss": 0.2986, "step": 2928 }, { "epoch": 2.4565278166060946, "grad_norm": 0.3382163867308771, "learning_rate": 9.646923872521264e-07, "loss": 0.2974, "step": 2929 }, { "epoch": 2.4573665082471345, "grad_norm": 0.35423451554495583, "learning_rate": 9.618120734152193e-07, "loss": 0.2931, "step": 2930 }, { "epoch": 2.4582051998881744, "grad_norm": 0.342585752132801, "learning_rate": 9.58935608279174e-07, "loss": 0.313, "step": 2931 }, { "epoch": 2.4590438915292143, "grad_norm": 0.3438554980950355, "learning_rate": 9.560629945854855e-07, "loss": 0.2732, "step": 2932 }, { "epoch": 2.459882583170254, "grad_norm": 0.35918039153535714, "learning_rate": 9.531942350719697e-07, "loss": 0.312, "step": 2933 }, { "epoch": 2.4607212748112945, "grad_norm": 0.3199744951458609, "learning_rate": 9.503293324727747e-07, "loss": 0.2933, "step": 2934 }, { "epoch": 2.4615599664523344, "grad_norm": 0.3426891184069273, "learning_rate": 9.474682895183723e-07, "loss": 0.3049, "step": 2935 }, { "epoch": 2.4623986580933743, "grad_norm": 0.328153186621423, "learning_rate": 9.44611108935552e-07, "loss": 0.2724, "step": 2936 }, { "epoch": 2.463237349734414, "grad_norm": 0.3340945664410698, "learning_rate": 9.417577934474276e-07, "loss": 0.3403, "step": 2937 }, { "epoch": 2.464076041375454, "grad_norm": 0.3033500740222756, "learning_rate": 9.389083457734233e-07, "loss": 0.2659, "step": 2938 }, { "epoch": 2.4649147330164944, "grad_norm": 0.32113441970286033, "learning_rate": 9.360627686292805e-07, "loss": 0.2991, "step": 2939 }, { "epoch": 2.4657534246575343, "grad_norm": 0.36333177079152384, "learning_rate": 9.332210647270523e-07, "loss": 0.3137, "step": 2940 }, { "epoch": 2.4665921162985742, "grad_norm": 0.32790772097649323, "learning_rate": 9.303832367750987e-07, "loss": 0.2973, "step": 2941 }, { "epoch": 2.467430807939614, "grad_norm": 0.3403262948200487, "learning_rate": 9.275492874780856e-07, "loss": 0.2864, "step": 2942 }, { "epoch": 2.468269499580654, "grad_norm": 0.3153335901512237, "learning_rate": 9.247192195369842e-07, "loss": 0.289, "step": 2943 }, { "epoch": 2.4691081912216943, "grad_norm": 0.3305201367730947, "learning_rate": 9.218930356490625e-07, "loss": 0.2986, "step": 2944 }, { "epoch": 2.4699468828627342, "grad_norm": 0.3286756533471463, "learning_rate": 9.190707385078917e-07, "loss": 0.2935, "step": 2945 }, { "epoch": 2.470785574503774, "grad_norm": 0.3153578609034384, "learning_rate": 9.162523308033334e-07, "loss": 0.3033, "step": 2946 }, { "epoch": 2.471624266144814, "grad_norm": 0.35194089330160094, "learning_rate": 9.134378152215462e-07, "loss": 0.2902, "step": 2947 }, { "epoch": 2.472462957785854, "grad_norm": 0.3156540954983935, "learning_rate": 9.10627194444979e-07, "loss": 0.2851, "step": 2948 }, { "epoch": 2.4733016494268942, "grad_norm": 0.3227278355677203, "learning_rate": 9.078204711523658e-07, "loss": 0.2932, "step": 2949 }, { "epoch": 2.474140341067934, "grad_norm": 0.36577716754143336, "learning_rate": 9.050176480187284e-07, "loss": 0.317, "step": 2950 }, { "epoch": 2.474979032708974, "grad_norm": 0.33477897116828265, "learning_rate": 9.022187277153716e-07, "loss": 0.2847, "step": 2951 }, { "epoch": 2.475817724350014, "grad_norm": 0.3482968519236947, "learning_rate": 8.994237129098787e-07, "loss": 0.3085, "step": 2952 }, { "epoch": 2.476656415991054, "grad_norm": 0.3528558953207895, "learning_rate": 8.966326062661134e-07, "loss": 0.2962, "step": 2953 }, { "epoch": 2.4774951076320937, "grad_norm": 0.32647041413337946, "learning_rate": 8.938454104442118e-07, "loss": 0.2976, "step": 2954 }, { "epoch": 2.478333799273134, "grad_norm": 0.305724743767305, "learning_rate": 8.910621281005832e-07, "loss": 0.2802, "step": 2955 }, { "epoch": 2.479172490914174, "grad_norm": 0.322712663835685, "learning_rate": 8.882827618879108e-07, "loss": 0.2839, "step": 2956 }, { "epoch": 2.480011182555214, "grad_norm": 0.33759762721051945, "learning_rate": 8.855073144551391e-07, "loss": 0.3279, "step": 2957 }, { "epoch": 2.4808498741962537, "grad_norm": 0.3230867802255492, "learning_rate": 8.82735788447483e-07, "loss": 0.2549, "step": 2958 }, { "epoch": 2.4816885658372936, "grad_norm": 0.3385477525213482, "learning_rate": 8.799681865064169e-07, "loss": 0.2947, "step": 2959 }, { "epoch": 2.482527257478334, "grad_norm": 0.3173500750099291, "learning_rate": 8.772045112696764e-07, "loss": 0.3124, "step": 2960 }, { "epoch": 2.483365949119374, "grad_norm": 0.3521398180880395, "learning_rate": 8.744447653712562e-07, "loss": 0.3202, "step": 2961 }, { "epoch": 2.4842046407604137, "grad_norm": 0.3303595526236698, "learning_rate": 8.716889514414023e-07, "loss": 0.2696, "step": 2962 }, { "epoch": 2.4850433324014536, "grad_norm": 0.32738099985054303, "learning_rate": 8.689370721066171e-07, "loss": 0.3006, "step": 2963 }, { "epoch": 2.4858820240424935, "grad_norm": 0.3361528756484867, "learning_rate": 8.661891299896497e-07, "loss": 0.2872, "step": 2964 }, { "epoch": 2.486720715683534, "grad_norm": 0.3582913183286533, "learning_rate": 8.634451277095002e-07, "loss": 0.3352, "step": 2965 }, { "epoch": 2.4875594073245737, "grad_norm": 0.3542763620284574, "learning_rate": 8.607050678814121e-07, "loss": 0.3246, "step": 2966 }, { "epoch": 2.4883980989656136, "grad_norm": 0.36523709162774537, "learning_rate": 8.579689531168689e-07, "loss": 0.314, "step": 2967 }, { "epoch": 2.4892367906066535, "grad_norm": 0.3025790794944582, "learning_rate": 8.552367860235977e-07, "loss": 0.2527, "step": 2968 }, { "epoch": 2.4900754822476934, "grad_norm": 0.34312829011331797, "learning_rate": 8.525085692055635e-07, "loss": 0.3438, "step": 2969 }, { "epoch": 2.4909141738887337, "grad_norm": 0.34818454916255526, "learning_rate": 8.497843052629623e-07, "loss": 0.3127, "step": 2970 }, { "epoch": 2.4917528655297736, "grad_norm": 0.34870485889253483, "learning_rate": 8.470639967922279e-07, "loss": 0.3263, "step": 2971 }, { "epoch": 2.4925915571708135, "grad_norm": 0.33441572766493527, "learning_rate": 8.44347646386019e-07, "loss": 0.2833, "step": 2972 }, { "epoch": 2.4934302488118534, "grad_norm": 0.3215788553249824, "learning_rate": 8.416352566332253e-07, "loss": 0.2856, "step": 2973 }, { "epoch": 2.4942689404528933, "grad_norm": 0.32930463118726966, "learning_rate": 8.38926830118963e-07, "loss": 0.3127, "step": 2974 }, { "epoch": 2.4951076320939336, "grad_norm": 0.32027566469053625, "learning_rate": 8.362223694245658e-07, "loss": 0.2866, "step": 2975 }, { "epoch": 2.4959463237349735, "grad_norm": 0.335695769675163, "learning_rate": 8.335218771275922e-07, "loss": 0.3198, "step": 2976 }, { "epoch": 2.4967850153760134, "grad_norm": 0.3117080041498759, "learning_rate": 8.308253558018181e-07, "loss": 0.2576, "step": 2977 }, { "epoch": 2.4976237070170533, "grad_norm": 0.35511684752129496, "learning_rate": 8.281328080172324e-07, "loss": 0.3272, "step": 2978 }, { "epoch": 2.498462398658093, "grad_norm": 0.31663156025172995, "learning_rate": 8.254442363400411e-07, "loss": 0.2841, "step": 2979 }, { "epoch": 2.4993010902991335, "grad_norm": 0.3270790530157082, "learning_rate": 8.22759643332654e-07, "loss": 0.3154, "step": 2980 }, { "epoch": 2.5001397819401734, "grad_norm": 0.32154000167427077, "learning_rate": 8.200790315536955e-07, "loss": 0.2965, "step": 2981 }, { "epoch": 2.5009784735812133, "grad_norm": 0.3477536778255259, "learning_rate": 8.174024035579925e-07, "loss": 0.2981, "step": 2982 }, { "epoch": 2.501817165222253, "grad_norm": 0.366095516278223, "learning_rate": 8.147297618965744e-07, "loss": 0.3052, "step": 2983 }, { "epoch": 2.502655856863293, "grad_norm": 0.3205846514412879, "learning_rate": 8.120611091166746e-07, "loss": 0.2677, "step": 2984 }, { "epoch": 2.5034945485043334, "grad_norm": 0.34478488177235356, "learning_rate": 8.093964477617194e-07, "loss": 0.3068, "step": 2985 }, { "epoch": 2.5043332401453733, "grad_norm": 0.33841814668120784, "learning_rate": 8.067357803713366e-07, "loss": 0.2821, "step": 2986 }, { "epoch": 2.505171931786413, "grad_norm": 0.3013321639243771, "learning_rate": 8.04079109481345e-07, "loss": 0.28, "step": 2987 }, { "epoch": 2.506010623427453, "grad_norm": 0.3213905531980109, "learning_rate": 8.01426437623753e-07, "loss": 0.3047, "step": 2988 }, { "epoch": 2.506849315068493, "grad_norm": 0.30490437952080796, "learning_rate": 7.987777673267594e-07, "loss": 0.2778, "step": 2989 }, { "epoch": 2.5076880067095333, "grad_norm": 0.33395869022786606, "learning_rate": 7.961331011147493e-07, "loss": 0.3179, "step": 2990 }, { "epoch": 2.508526698350573, "grad_norm": 0.31770139615867204, "learning_rate": 7.934924415082911e-07, "loss": 0.3011, "step": 2991 }, { "epoch": 2.509365389991613, "grad_norm": 0.3208756589663866, "learning_rate": 7.90855791024136e-07, "loss": 0.2772, "step": 2992 }, { "epoch": 2.510204081632653, "grad_norm": 0.3260209247344885, "learning_rate": 7.882231521752104e-07, "loss": 0.2955, "step": 2993 }, { "epoch": 2.511042773273693, "grad_norm": 0.3166895086028902, "learning_rate": 7.855945274706206e-07, "loss": 0.2962, "step": 2994 }, { "epoch": 2.511881464914733, "grad_norm": 0.33159491741103936, "learning_rate": 7.829699194156482e-07, "loss": 0.3085, "step": 2995 }, { "epoch": 2.512720156555773, "grad_norm": 0.32165832545042333, "learning_rate": 7.803493305117421e-07, "loss": 0.2882, "step": 2996 }, { "epoch": 2.513558848196813, "grad_norm": 0.3446792431496618, "learning_rate": 7.777327632565251e-07, "loss": 0.2956, "step": 2997 }, { "epoch": 2.514397539837853, "grad_norm": 0.3635604813619778, "learning_rate": 7.75120220143783e-07, "loss": 0.3013, "step": 2998 }, { "epoch": 2.5152362314788927, "grad_norm": 0.3438006525764656, "learning_rate": 7.725117036634705e-07, "loss": 0.3237, "step": 2999 }, { "epoch": 2.516074923119933, "grad_norm": 0.31319089738771055, "learning_rate": 7.699072163017013e-07, "loss": 0.267, "step": 3000 }, { "epoch": 2.516913614760973, "grad_norm": 0.34980095986514004, "learning_rate": 7.673067605407513e-07, "loss": 0.3102, "step": 3001 }, { "epoch": 2.517752306402013, "grad_norm": 0.32793745696187576, "learning_rate": 7.647103388590543e-07, "loss": 0.3019, "step": 3002 }, { "epoch": 2.5185909980430528, "grad_norm": 0.3396749810705954, "learning_rate": 7.621179537311951e-07, "loss": 0.2862, "step": 3003 }, { "epoch": 2.5194296896840926, "grad_norm": 0.349086462421962, "learning_rate": 7.595296076279157e-07, "loss": 0.3084, "step": 3004 }, { "epoch": 2.520268381325133, "grad_norm": 0.33250419552202903, "learning_rate": 7.569453030161089e-07, "loss": 0.2746, "step": 3005 }, { "epoch": 2.521107072966173, "grad_norm": 0.32919909881703885, "learning_rate": 7.543650423588106e-07, "loss": 0.3038, "step": 3006 }, { "epoch": 2.5219457646072128, "grad_norm": 0.3123327821879745, "learning_rate": 7.517888281152075e-07, "loss": 0.2765, "step": 3007 }, { "epoch": 2.5227844562482526, "grad_norm": 0.376487388152636, "learning_rate": 7.4921666274063e-07, "loss": 0.3465, "step": 3008 }, { "epoch": 2.5236231478892925, "grad_norm": 0.30704317578214774, "learning_rate": 7.466485486865444e-07, "loss": 0.2588, "step": 3009 }, { "epoch": 2.524461839530333, "grad_norm": 0.3341104122576937, "learning_rate": 7.440844884005616e-07, "loss": 0.3566, "step": 3010 }, { "epoch": 2.5253005311713728, "grad_norm": 0.3352610978834892, "learning_rate": 7.415244843264247e-07, "loss": 0.3061, "step": 3011 }, { "epoch": 2.5261392228124127, "grad_norm": 0.35351623579138725, "learning_rate": 7.38968538904013e-07, "loss": 0.2959, "step": 3012 }, { "epoch": 2.5269779144534525, "grad_norm": 0.3249830246036241, "learning_rate": 7.36416654569338e-07, "loss": 0.2679, "step": 3013 }, { "epoch": 2.5278166060944924, "grad_norm": 0.3217688992502253, "learning_rate": 7.338688337545402e-07, "loss": 0.2861, "step": 3014 }, { "epoch": 2.5286552977355328, "grad_norm": 0.32960484977745524, "learning_rate": 7.313250788878873e-07, "loss": 0.3117, "step": 3015 }, { "epoch": 2.5294939893765727, "grad_norm": 0.3440190451390263, "learning_rate": 7.287853923937699e-07, "loss": 0.2919, "step": 3016 }, { "epoch": 2.5303326810176126, "grad_norm": 0.3227819474934429, "learning_rate": 7.26249776692704e-07, "loss": 0.281, "step": 3017 }, { "epoch": 2.5311713726586524, "grad_norm": 0.3269982264558969, "learning_rate": 7.237182342013249e-07, "loss": 0.3046, "step": 3018 }, { "epoch": 2.5320100642996923, "grad_norm": 0.3608271461939514, "learning_rate": 7.211907673323842e-07, "loss": 0.3625, "step": 3019 }, { "epoch": 2.5328487559407327, "grad_norm": 0.3088090918670581, "learning_rate": 7.186673784947512e-07, "loss": 0.2718, "step": 3020 }, { "epoch": 2.5336874475817726, "grad_norm": 0.3440657480748665, "learning_rate": 7.161480700934092e-07, "loss": 0.3148, "step": 3021 }, { "epoch": 2.5345261392228124, "grad_norm": 0.32783964743222954, "learning_rate": 7.136328445294483e-07, "loss": 0.2994, "step": 3022 }, { "epoch": 2.5353648308638523, "grad_norm": 0.33933857296342335, "learning_rate": 7.111217042000729e-07, "loss": 0.3149, "step": 3023 }, { "epoch": 2.5362035225048922, "grad_norm": 0.3380893173188873, "learning_rate": 7.086146514985881e-07, "loss": 0.2924, "step": 3024 }, { "epoch": 2.5370422141459326, "grad_norm": 0.3289097420061372, "learning_rate": 7.061116888144087e-07, "loss": 0.3103, "step": 3025 }, { "epoch": 2.5378809057869725, "grad_norm": 0.3024656719949479, "learning_rate": 7.036128185330476e-07, "loss": 0.3063, "step": 3026 }, { "epoch": 2.5387195974280123, "grad_norm": 0.3445386762786815, "learning_rate": 7.011180430361198e-07, "loss": 0.308, "step": 3027 }, { "epoch": 2.5395582890690522, "grad_norm": 0.3466872643204685, "learning_rate": 6.986273647013364e-07, "loss": 0.2991, "step": 3028 }, { "epoch": 2.540396980710092, "grad_norm": 0.3115539495463966, "learning_rate": 6.961407859025021e-07, "loss": 0.301, "step": 3029 }, { "epoch": 2.5412356723511325, "grad_norm": 0.3235963245277268, "learning_rate": 6.936583090095172e-07, "loss": 0.2683, "step": 3030 }, { "epoch": 2.5420743639921723, "grad_norm": 0.35716473919361313, "learning_rate": 6.91179936388372e-07, "loss": 0.3348, "step": 3031 }, { "epoch": 2.5429130556332122, "grad_norm": 0.30722079134059405, "learning_rate": 6.88705670401143e-07, "loss": 0.2814, "step": 3032 }, { "epoch": 2.543751747274252, "grad_norm": 0.32874737018149014, "learning_rate": 6.862355134059945e-07, "loss": 0.2973, "step": 3033 }, { "epoch": 2.544590438915292, "grad_norm": 0.32146177434137413, "learning_rate": 6.837694677571765e-07, "loss": 0.3114, "step": 3034 }, { "epoch": 2.5454291305563324, "grad_norm": 0.3192798468538854, "learning_rate": 6.813075358050153e-07, "loss": 0.2805, "step": 3035 }, { "epoch": 2.5462678221973722, "grad_norm": 0.3380969224033969, "learning_rate": 6.788497198959226e-07, "loss": 0.3326, "step": 3036 }, { "epoch": 2.547106513838412, "grad_norm": 0.2821413802729324, "learning_rate": 6.763960223723815e-07, "loss": 0.2548, "step": 3037 }, { "epoch": 2.547945205479452, "grad_norm": 0.3433660828324825, "learning_rate": 6.739464455729544e-07, "loss": 0.3172, "step": 3038 }, { "epoch": 2.548783897120492, "grad_norm": 0.3465071611043342, "learning_rate": 6.71500991832274e-07, "loss": 0.3049, "step": 3039 }, { "epoch": 2.5496225887615322, "grad_norm": 0.35121674241299344, "learning_rate": 6.690596634810448e-07, "loss": 0.3257, "step": 3040 }, { "epoch": 2.550461280402572, "grad_norm": 0.3284520436630772, "learning_rate": 6.666224628460393e-07, "loss": 0.2849, "step": 3041 }, { "epoch": 2.551299972043612, "grad_norm": 0.32174424732398843, "learning_rate": 6.641893922500942e-07, "loss": 0.3304, "step": 3042 }, { "epoch": 2.552138663684652, "grad_norm": 0.33322152837903424, "learning_rate": 6.617604540121108e-07, "loss": 0.3028, "step": 3043 }, { "epoch": 2.552977355325692, "grad_norm": 0.30293720061387863, "learning_rate": 6.593356504470549e-07, "loss": 0.3148, "step": 3044 }, { "epoch": 2.553816046966732, "grad_norm": 0.3090403037934982, "learning_rate": 6.569149838659461e-07, "loss": 0.2937, "step": 3045 }, { "epoch": 2.554654738607772, "grad_norm": 0.33113042128247905, "learning_rate": 6.544984565758656e-07, "loss": 0.2898, "step": 3046 }, { "epoch": 2.555493430248812, "grad_norm": 0.33752591572781665, "learning_rate": 6.520860708799487e-07, "loss": 0.3273, "step": 3047 }, { "epoch": 2.556332121889852, "grad_norm": 0.3061671832244996, "learning_rate": 6.496778290773814e-07, "loss": 0.2692, "step": 3048 }, { "epoch": 2.5571708135308917, "grad_norm": 0.31138218854355043, "learning_rate": 6.472737334634021e-07, "loss": 0.2809, "step": 3049 }, { "epoch": 2.558009505171932, "grad_norm": 0.30306859039497197, "learning_rate": 6.448737863292976e-07, "loss": 0.2859, "step": 3050 }, { "epoch": 2.5588481968129715, "grad_norm": 0.32439986849324093, "learning_rate": 6.424779899624001e-07, "loss": 0.3133, "step": 3051 }, { "epoch": 2.559686888454012, "grad_norm": 0.3276651395501673, "learning_rate": 6.400863466460871e-07, "loss": 0.2913, "step": 3052 }, { "epoch": 2.5605255800950517, "grad_norm": 0.3338628321701344, "learning_rate": 6.376988586597754e-07, "loss": 0.3005, "step": 3053 }, { "epoch": 2.5613642717360916, "grad_norm": 0.3514256091522344, "learning_rate": 6.353155282789248e-07, "loss": 0.3199, "step": 3054 }, { "epoch": 2.562202963377132, "grad_norm": 0.33209228809972113, "learning_rate": 6.329363577750286e-07, "loss": 0.3107, "step": 3055 }, { "epoch": 2.5630416550181714, "grad_norm": 0.327594963794245, "learning_rate": 6.305613494156182e-07, "loss": 0.312, "step": 3056 }, { "epoch": 2.5638803466592117, "grad_norm": 0.31537844141184224, "learning_rate": 6.281905054642601e-07, "loss": 0.2923, "step": 3057 }, { "epoch": 2.5647190383002516, "grad_norm": 0.32413811006370274, "learning_rate": 6.258238281805456e-07, "loss": 0.2969, "step": 3058 }, { "epoch": 2.5655577299412915, "grad_norm": 0.3460113444206311, "learning_rate": 6.234613198201006e-07, "loss": 0.3231, "step": 3059 }, { "epoch": 2.566396421582332, "grad_norm": 0.31121095593094156, "learning_rate": 6.21102982634576e-07, "loss": 0.247, "step": 3060 }, { "epoch": 2.5672351132233713, "grad_norm": 0.32174256257038053, "learning_rate": 6.187488188716457e-07, "loss": 0.3178, "step": 3061 }, { "epoch": 2.5680738048644116, "grad_norm": 0.31125310880363816, "learning_rate": 6.163988307750068e-07, "loss": 0.306, "step": 3062 }, { "epoch": 2.5689124965054515, "grad_norm": 0.3328571734124269, "learning_rate": 6.140530205843786e-07, "loss": 0.2891, "step": 3063 }, { "epoch": 2.5697511881464914, "grad_norm": 0.36466988708829745, "learning_rate": 6.11711390535496e-07, "loss": 0.2888, "step": 3064 }, { "epoch": 2.5705898797875313, "grad_norm": 0.3536153337705102, "learning_rate": 6.093739428601126e-07, "loss": 0.2929, "step": 3065 }, { "epoch": 2.571428571428571, "grad_norm": 0.3220171909186335, "learning_rate": 6.070406797859912e-07, "loss": 0.2884, "step": 3066 }, { "epoch": 2.5722672630696115, "grad_norm": 0.3453371846436836, "learning_rate": 6.047116035369111e-07, "loss": 0.3153, "step": 3067 }, { "epoch": 2.5731059547106514, "grad_norm": 0.3326952363691263, "learning_rate": 6.023867163326603e-07, "loss": 0.2952, "step": 3068 }, { "epoch": 2.5739446463516913, "grad_norm": 0.3286783580299663, "learning_rate": 6.000660203890313e-07, "loss": 0.2825, "step": 3069 }, { "epoch": 2.574783337992731, "grad_norm": 0.35002768405078416, "learning_rate": 5.977495179178266e-07, "loss": 0.3342, "step": 3070 }, { "epoch": 2.575622029633771, "grad_norm": 0.3219582701151339, "learning_rate": 5.954372111268475e-07, "loss": 0.2785, "step": 3071 }, { "epoch": 2.5764607212748114, "grad_norm": 0.37183133543663655, "learning_rate": 5.931291022198993e-07, "loss": 0.311, "step": 3072 }, { "epoch": 2.5772994129158513, "grad_norm": 0.3109123776420076, "learning_rate": 5.908251933967873e-07, "loss": 0.2864, "step": 3073 }, { "epoch": 2.578138104556891, "grad_norm": 0.35307347998113925, "learning_rate": 5.885254868533096e-07, "loss": 0.308, "step": 3074 }, { "epoch": 2.578976796197931, "grad_norm": 0.33977224260248357, "learning_rate": 5.862299847812625e-07, "loss": 0.2996, "step": 3075 }, { "epoch": 2.579815487838971, "grad_norm": 0.3210457349661455, "learning_rate": 5.839386893684357e-07, "loss": 0.2954, "step": 3076 }, { "epoch": 2.5806541794800113, "grad_norm": 0.3276656504161176, "learning_rate": 5.816516027986069e-07, "loss": 0.3056, "step": 3077 }, { "epoch": 2.581492871121051, "grad_norm": 0.2892080310440699, "learning_rate": 5.793687272515463e-07, "loss": 0.2729, "step": 3078 }, { "epoch": 2.582331562762091, "grad_norm": 0.3046975154217273, "learning_rate": 5.770900649030053e-07, "loss": 0.2976, "step": 3079 }, { "epoch": 2.583170254403131, "grad_norm": 0.3349900510902511, "learning_rate": 5.748156179247239e-07, "loss": 0.2948, "step": 3080 }, { "epoch": 2.584008946044171, "grad_norm": 0.3478433578558413, "learning_rate": 5.725453884844245e-07, "loss": 0.3247, "step": 3081 }, { "epoch": 2.584847637685211, "grad_norm": 0.322456349227591, "learning_rate": 5.702793787458072e-07, "loss": 0.2705, "step": 3082 }, { "epoch": 2.585686329326251, "grad_norm": 0.33369053479405464, "learning_rate": 5.680175908685537e-07, "loss": 0.2915, "step": 3083 }, { "epoch": 2.586525020967291, "grad_norm": 0.32682608994819745, "learning_rate": 5.65760027008318e-07, "loss": 0.2992, "step": 3084 }, { "epoch": 2.587363712608331, "grad_norm": 0.35620156715374746, "learning_rate": 5.635066893167318e-07, "loss": 0.3054, "step": 3085 }, { "epoch": 2.5882024042493708, "grad_norm": 0.33326518709443154, "learning_rate": 5.612575799413989e-07, "loss": 0.2914, "step": 3086 }, { "epoch": 2.589041095890411, "grad_norm": 0.34477682665976755, "learning_rate": 5.590127010258889e-07, "loss": 0.3324, "step": 3087 }, { "epoch": 2.589879787531451, "grad_norm": 0.31549827424448373, "learning_rate": 5.56772054709745e-07, "loss": 0.2654, "step": 3088 }, { "epoch": 2.590718479172491, "grad_norm": 0.3458171906414895, "learning_rate": 5.545356431284726e-07, "loss": 0.3161, "step": 3089 }, { "epoch": 2.5915571708135308, "grad_norm": 0.32651621184494606, "learning_rate": 5.523034684135431e-07, "loss": 0.2831, "step": 3090 }, { "epoch": 2.5923958624545707, "grad_norm": 0.32759375913531025, "learning_rate": 5.500755326923901e-07, "loss": 0.2883, "step": 3091 }, { "epoch": 2.593234554095611, "grad_norm": 0.33953145454102024, "learning_rate": 5.478518380884035e-07, "loss": 0.3135, "step": 3092 }, { "epoch": 2.594073245736651, "grad_norm": 0.3310177803644698, "learning_rate": 5.456323867209357e-07, "loss": 0.3022, "step": 3093 }, { "epoch": 2.5949119373776908, "grad_norm": 0.3098885850931667, "learning_rate": 5.434171807052934e-07, "loss": 0.2739, "step": 3094 }, { "epoch": 2.5957506290187307, "grad_norm": 0.3493683693988168, "learning_rate": 5.412062221527348e-07, "loss": 0.3144, "step": 3095 }, { "epoch": 2.5965893206597706, "grad_norm": 0.3058117733461192, "learning_rate": 5.389995131704739e-07, "loss": 0.2636, "step": 3096 }, { "epoch": 2.597428012300811, "grad_norm": 0.3094807913970245, "learning_rate": 5.367970558616708e-07, "loss": 0.29, "step": 3097 }, { "epoch": 2.5982667039418508, "grad_norm": 0.3452962524346108, "learning_rate": 5.345988523254359e-07, "loss": 0.3351, "step": 3098 }, { "epoch": 2.5991053955828907, "grad_norm": 0.345411628060575, "learning_rate": 5.324049046568252e-07, "loss": 0.2936, "step": 3099 }, { "epoch": 2.5999440872239306, "grad_norm": 0.3134398212234614, "learning_rate": 5.30215214946837e-07, "loss": 0.2888, "step": 3100 }, { "epoch": 2.6007827788649704, "grad_norm": 0.32879724545585537, "learning_rate": 5.280297852824151e-07, "loss": 0.3242, "step": 3101 }, { "epoch": 2.601621470506011, "grad_norm": 0.310416968432378, "learning_rate": 5.258486177464367e-07, "loss": 0.2742, "step": 3102 }, { "epoch": 2.6024601621470507, "grad_norm": 0.3335044097765937, "learning_rate": 5.236717144177234e-07, "loss": 0.2953, "step": 3103 }, { "epoch": 2.6032988537880906, "grad_norm": 0.33417439872703186, "learning_rate": 5.214990773710294e-07, "loss": 0.2802, "step": 3104 }, { "epoch": 2.6041375454291305, "grad_norm": 0.3522085176194349, "learning_rate": 5.193307086770427e-07, "loss": 0.3139, "step": 3105 }, { "epoch": 2.6049762370701703, "grad_norm": 0.33772935216794225, "learning_rate": 5.171666104023837e-07, "loss": 0.3324, "step": 3106 }, { "epoch": 2.6058149287112107, "grad_norm": 0.31681904339296685, "learning_rate": 5.150067846096051e-07, "loss": 0.3067, "step": 3107 }, { "epoch": 2.6066536203522506, "grad_norm": 0.30489436032419814, "learning_rate": 5.128512333571833e-07, "loss": 0.291, "step": 3108 }, { "epoch": 2.6074923119932905, "grad_norm": 0.3109857261355615, "learning_rate": 5.106999586995248e-07, "loss": 0.2669, "step": 3109 }, { "epoch": 2.6083310036343303, "grad_norm": 0.3510593142487789, "learning_rate": 5.085529626869556e-07, "loss": 0.3263, "step": 3110 }, { "epoch": 2.6091696952753702, "grad_norm": 0.3353876018344034, "learning_rate": 5.064102473657284e-07, "loss": 0.2612, "step": 3111 }, { "epoch": 2.6100083869164106, "grad_norm": 0.35247309321451514, "learning_rate": 5.04271814778014e-07, "loss": 0.2907, "step": 3112 }, { "epoch": 2.6108470785574505, "grad_norm": 0.3421131933703011, "learning_rate": 5.021376669619016e-07, "loss": 0.2933, "step": 3113 }, { "epoch": 2.6116857701984904, "grad_norm": 0.3480563447243034, "learning_rate": 5.00007805951398e-07, "loss": 0.3498, "step": 3114 }, { "epoch": 2.6125244618395302, "grad_norm": 0.3499740067093459, "learning_rate": 4.978822337764205e-07, "loss": 0.3106, "step": 3115 }, { "epoch": 2.61336315348057, "grad_norm": 0.3230082667005654, "learning_rate": 4.957609524628026e-07, "loss": 0.2884, "step": 3116 }, { "epoch": 2.6142018451216105, "grad_norm": 0.31308754395936944, "learning_rate": 4.936439640322882e-07, "loss": 0.2956, "step": 3117 }, { "epoch": 2.6150405367626504, "grad_norm": 0.35681289437206265, "learning_rate": 4.915312705025266e-07, "loss": 0.3031, "step": 3118 }, { "epoch": 2.6158792284036902, "grad_norm": 0.3432093304267557, "learning_rate": 4.894228738870765e-07, "loss": 0.3093, "step": 3119 }, { "epoch": 2.61671792004473, "grad_norm": 0.3236010934035196, "learning_rate": 4.873187761954018e-07, "loss": 0.3141, "step": 3120 }, { "epoch": 2.61755661168577, "grad_norm": 0.3193779226159348, "learning_rate": 4.85218979432866e-07, "loss": 0.2767, "step": 3121 }, { "epoch": 2.6183953033268104, "grad_norm": 0.3399769800412699, "learning_rate": 4.831234856007372e-07, "loss": 0.3022, "step": 3122 }, { "epoch": 2.6192339949678503, "grad_norm": 0.3280770829399806, "learning_rate": 4.810322966961794e-07, "loss": 0.3145, "step": 3123 }, { "epoch": 2.62007268660889, "grad_norm": 0.30803414461991097, "learning_rate": 4.789454147122552e-07, "loss": 0.3124, "step": 3124 }, { "epoch": 2.62091137824993, "grad_norm": 0.3228841527557263, "learning_rate": 4.768628416379222e-07, "loss": 0.2693, "step": 3125 }, { "epoch": 2.62175006989097, "grad_norm": 0.3436723446871925, "learning_rate": 4.747845794580325e-07, "loss": 0.296, "step": 3126 }, { "epoch": 2.6225887615320103, "grad_norm": 0.32855107799918065, "learning_rate": 4.7271063015332743e-07, "loss": 0.3081, "step": 3127 }, { "epoch": 2.62342745317305, "grad_norm": 0.3236404832705607, "learning_rate": 4.706409957004382e-07, "loss": 0.3062, "step": 3128 }, { "epoch": 2.62426614481409, "grad_norm": 0.3274415916227704, "learning_rate": 4.6857567807188474e-07, "loss": 0.2982, "step": 3129 }, { "epoch": 2.62510483645513, "grad_norm": 0.3351990609402136, "learning_rate": 4.665146792360725e-07, "loss": 0.3171, "step": 3130 }, { "epoch": 2.62594352809617, "grad_norm": 0.3190649656554881, "learning_rate": 4.644580011572897e-07, "loss": 0.315, "step": 3131 }, { "epoch": 2.62678221973721, "grad_norm": 0.3211697601063691, "learning_rate": 4.624056457957077e-07, "loss": 0.2636, "step": 3132 }, { "epoch": 2.62762091137825, "grad_norm": 0.31268256061735394, "learning_rate": 4.6035761510737853e-07, "loss": 0.3092, "step": 3133 }, { "epoch": 2.62845960301929, "grad_norm": 0.3111538261448018, "learning_rate": 4.583139110442292e-07, "loss": 0.2818, "step": 3134 }, { "epoch": 2.62929829466033, "grad_norm": 0.33646769080109573, "learning_rate": 4.562745355540682e-07, "loss": 0.3366, "step": 3135 }, { "epoch": 2.6301369863013697, "grad_norm": 0.3492294994608449, "learning_rate": 4.5423949058057303e-07, "loss": 0.2897, "step": 3136 }, { "epoch": 2.63097567794241, "grad_norm": 0.3309352379047094, "learning_rate": 4.522087780632983e-07, "loss": 0.2832, "step": 3137 }, { "epoch": 2.63181436958345, "grad_norm": 0.32827892904984235, "learning_rate": 4.50182399937667e-07, "loss": 0.3042, "step": 3138 }, { "epoch": 2.63265306122449, "grad_norm": 0.3161964952103686, "learning_rate": 4.4816035813497304e-07, "loss": 0.2963, "step": 3139 }, { "epoch": 2.6334917528655297, "grad_norm": 0.31928617589633956, "learning_rate": 4.461426545823766e-07, "loss": 0.3036, "step": 3140 }, { "epoch": 2.6343304445065696, "grad_norm": 0.3213935050978242, "learning_rate": 4.4412929120290104e-07, "loss": 0.2882, "step": 3141 }, { "epoch": 2.63516913614761, "grad_norm": 0.33177998390539437, "learning_rate": 4.4212026991543643e-07, "loss": 0.2952, "step": 3142 }, { "epoch": 2.63600782778865, "grad_norm": 0.329353760564898, "learning_rate": 4.4011559263473335e-07, "loss": 0.2993, "step": 3143 }, { "epoch": 2.6368465194296897, "grad_norm": 0.332203859686989, "learning_rate": 4.3811526127140124e-07, "loss": 0.3153, "step": 3144 }, { "epoch": 2.6376852110707296, "grad_norm": 0.34331327369508124, "learning_rate": 4.3611927773190845e-07, "loss": 0.3316, "step": 3145 }, { "epoch": 2.6385239027117695, "grad_norm": 0.2924215067832511, "learning_rate": 4.34127643918581e-07, "loss": 0.291, "step": 3146 }, { "epoch": 2.63936259435281, "grad_norm": 0.30055713571988785, "learning_rate": 4.3214036172959494e-07, "loss": 0.2793, "step": 3147 }, { "epoch": 2.6402012859938497, "grad_norm": 0.33577169649022853, "learning_rate": 4.301574330589836e-07, "loss": 0.3283, "step": 3148 }, { "epoch": 2.6410399776348896, "grad_norm": 0.30402298953871143, "learning_rate": 4.28178859796628e-07, "loss": 0.2528, "step": 3149 }, { "epoch": 2.6418786692759295, "grad_norm": 0.3514336008829033, "learning_rate": 4.262046438282602e-07, "loss": 0.3132, "step": 3150 }, { "epoch": 2.6427173609169694, "grad_norm": 0.3320042477897673, "learning_rate": 4.2423478703545964e-07, "loss": 0.2999, "step": 3151 }, { "epoch": 2.6435560525580097, "grad_norm": 0.3259131873575711, "learning_rate": 4.2226929129564664e-07, "loss": 0.2962, "step": 3152 }, { "epoch": 2.6443947441990496, "grad_norm": 0.3148022683615486, "learning_rate": 4.2030815848209174e-07, "loss": 0.2927, "step": 3153 }, { "epoch": 2.6452334358400895, "grad_norm": 0.3213247926137648, "learning_rate": 4.1835139046390194e-07, "loss": 0.2713, "step": 3154 }, { "epoch": 2.6460721274811294, "grad_norm": 0.3463794848842325, "learning_rate": 4.163989891060266e-07, "loss": 0.3287, "step": 3155 }, { "epoch": 2.6469108191221693, "grad_norm": 0.3231684302306179, "learning_rate": 4.1445095626925445e-07, "loss": 0.2964, "step": 3156 }, { "epoch": 2.6477495107632096, "grad_norm": 0.3206502421793615, "learning_rate": 4.1250729381020703e-07, "loss": 0.3228, "step": 3157 }, { "epoch": 2.6485882024042495, "grad_norm": 0.30299947173457176, "learning_rate": 4.1056800358134366e-07, "loss": 0.2972, "step": 3158 }, { "epoch": 2.6494268940452894, "grad_norm": 0.3493749984143559, "learning_rate": 4.0863308743095687e-07, "loss": 0.311, "step": 3159 }, { "epoch": 2.6502655856863293, "grad_norm": 0.32001055543395995, "learning_rate": 4.067025472031677e-07, "loss": 0.2891, "step": 3160 }, { "epoch": 2.651104277327369, "grad_norm": 0.31671235544558574, "learning_rate": 4.047763847379288e-07, "loss": 0.2805, "step": 3161 }, { "epoch": 2.6519429689684095, "grad_norm": 0.3191044424272406, "learning_rate": 4.0285460187101997e-07, "loss": 0.3093, "step": 3162 }, { "epoch": 2.6527816606094494, "grad_norm": 0.32354968959909514, "learning_rate": 4.0093720043404615e-07, "loss": 0.2934, "step": 3163 }, { "epoch": 2.6536203522504893, "grad_norm": 0.323522279843869, "learning_rate": 3.990241822544383e-07, "loss": 0.3203, "step": 3164 }, { "epoch": 2.654459043891529, "grad_norm": 0.3275256210441705, "learning_rate": 3.971155491554468e-07, "loss": 0.2666, "step": 3165 }, { "epoch": 2.655297735532569, "grad_norm": 0.3432583056151737, "learning_rate": 3.9521130295614605e-07, "loss": 0.3019, "step": 3166 }, { "epoch": 2.6561364271736094, "grad_norm": 0.3239948253236677, "learning_rate": 3.9331144547142653e-07, "loss": 0.2986, "step": 3167 }, { "epoch": 2.656975118814649, "grad_norm": 0.327446987017783, "learning_rate": 3.914159785119981e-07, "loss": 0.2929, "step": 3168 }, { "epoch": 2.657813810455689, "grad_norm": 0.3289647299490689, "learning_rate": 3.8952490388438515e-07, "loss": 0.2929, "step": 3169 }, { "epoch": 2.658652502096729, "grad_norm": 0.30704340784470324, "learning_rate": 3.876382233909248e-07, "loss": 0.2505, "step": 3170 }, { "epoch": 2.659491193737769, "grad_norm": 0.32419478255573686, "learning_rate": 3.857559388297688e-07, "loss": 0.2915, "step": 3171 }, { "epoch": 2.6603298853788093, "grad_norm": 0.32585022428160143, "learning_rate": 3.838780519948776e-07, "loss": 0.3049, "step": 3172 }, { "epoch": 2.6611685770198488, "grad_norm": 0.34124290321297246, "learning_rate": 3.8200456467601953e-07, "loss": 0.2939, "step": 3173 }, { "epoch": 2.662007268660889, "grad_norm": 0.3424553214182437, "learning_rate": 3.801354786587713e-07, "loss": 0.2826, "step": 3174 }, { "epoch": 2.662845960301929, "grad_norm": 0.33751351212024844, "learning_rate": 3.7827079572451464e-07, "loss": 0.3235, "step": 3175 }, { "epoch": 2.663684651942969, "grad_norm": 0.32259362199352587, "learning_rate": 3.76410517650434e-07, "loss": 0.2879, "step": 3176 }, { "epoch": 2.664523343584009, "grad_norm": 0.3381506636411997, "learning_rate": 3.745546462095173e-07, "loss": 0.3207, "step": 3177 }, { "epoch": 2.6653620352250487, "grad_norm": 0.3006938243333928, "learning_rate": 3.7270318317054966e-07, "loss": 0.3044, "step": 3178 }, { "epoch": 2.666200726866089, "grad_norm": 0.3232237420170521, "learning_rate": 3.708561302981184e-07, "loss": 0.2922, "step": 3179 }, { "epoch": 2.667039418507129, "grad_norm": 0.31285788262934466, "learning_rate": 3.690134893526037e-07, "loss": 0.2694, "step": 3180 }, { "epoch": 2.667878110148169, "grad_norm": 0.33702585764325493, "learning_rate": 3.671752620901842e-07, "loss": 0.3101, "step": 3181 }, { "epoch": 2.6687168017892087, "grad_norm": 0.32814956422794544, "learning_rate": 3.653414502628311e-07, "loss": 0.2833, "step": 3182 }, { "epoch": 2.6695554934302486, "grad_norm": 0.3100289513727342, "learning_rate": 3.635120556183047e-07, "loss": 0.3028, "step": 3183 }, { "epoch": 2.670394185071289, "grad_norm": 0.3173246541318507, "learning_rate": 3.6168707990015874e-07, "loss": 0.319, "step": 3184 }, { "epoch": 2.671232876712329, "grad_norm": 0.3176116856309235, "learning_rate": 3.598665248477351e-07, "loss": 0.2906, "step": 3185 }, { "epoch": 2.6720715683533687, "grad_norm": 0.29702954337798665, "learning_rate": 3.5805039219615975e-07, "loss": 0.2755, "step": 3186 }, { "epoch": 2.6729102599944086, "grad_norm": 0.3335076335355301, "learning_rate": 3.562386836763465e-07, "loss": 0.3326, "step": 3187 }, { "epoch": 2.6737489516354485, "grad_norm": 0.3172454505460403, "learning_rate": 3.544314010149913e-07, "loss": 0.2844, "step": 3188 }, { "epoch": 2.674587643276489, "grad_norm": 0.3228639037997647, "learning_rate": 3.5262854593457297e-07, "loss": 0.3178, "step": 3189 }, { "epoch": 2.6754263349175287, "grad_norm": 0.3300277743982161, "learning_rate": 3.5083012015334985e-07, "loss": 0.2932, "step": 3190 }, { "epoch": 2.6762650265585686, "grad_norm": 0.3120033367973706, "learning_rate": 3.4903612538535693e-07, "loss": 0.2892, "step": 3191 }, { "epoch": 2.6771037181996085, "grad_norm": 0.3129496517035721, "learning_rate": 3.472465633404104e-07, "loss": 0.3217, "step": 3192 }, { "epoch": 2.6779424098406484, "grad_norm": 0.3384131664059605, "learning_rate": 3.4546143572409764e-07, "loss": 0.3038, "step": 3193 }, { "epoch": 2.6787811014816887, "grad_norm": 0.332710299738071, "learning_rate": 3.4368074423778154e-07, "loss": 0.2844, "step": 3194 }, { "epoch": 2.6796197931227286, "grad_norm": 0.33663334979009246, "learning_rate": 3.419044905785979e-07, "loss": 0.3055, "step": 3195 }, { "epoch": 2.6804584847637685, "grad_norm": 0.34328895657533637, "learning_rate": 3.4013267643945026e-07, "loss": 0.3095, "step": 3196 }, { "epoch": 2.6812971764048084, "grad_norm": 0.32876779095094305, "learning_rate": 3.383653035090134e-07, "loss": 0.3035, "step": 3197 }, { "epoch": 2.6821358680458482, "grad_norm": 0.3240927294015249, "learning_rate": 3.3660237347172886e-07, "loss": 0.2969, "step": 3198 }, { "epoch": 2.6829745596868886, "grad_norm": 0.3623726670078819, "learning_rate": 3.34843888007802e-07, "loss": 0.347, "step": 3199 }, { "epoch": 2.6838132513279285, "grad_norm": 0.3164703876573052, "learning_rate": 3.330898487932044e-07, "loss": 0.3045, "step": 3200 }, { "epoch": 2.6846519429689684, "grad_norm": 0.3259795757579387, "learning_rate": 3.313402574996688e-07, "loss": 0.2788, "step": 3201 }, { "epoch": 2.6854906346100083, "grad_norm": 0.3513675458268847, "learning_rate": 3.295951157946897e-07, "loss": 0.3184, "step": 3202 }, { "epoch": 2.686329326251048, "grad_norm": 0.3275122422957617, "learning_rate": 3.2785442534152e-07, "loss": 0.2975, "step": 3203 }, { "epoch": 2.6871680178920885, "grad_norm": 0.3460999646801403, "learning_rate": 3.261181877991693e-07, "loss": 0.2884, "step": 3204 }, { "epoch": 2.6880067095331284, "grad_norm": 0.33598035138521315, "learning_rate": 3.2438640482240603e-07, "loss": 0.3067, "step": 3205 }, { "epoch": 2.6888454011741683, "grad_norm": 0.31569247084110674, "learning_rate": 3.2265907806174944e-07, "loss": 0.293, "step": 3206 }, { "epoch": 2.689684092815208, "grad_norm": 0.3223230401714417, "learning_rate": 3.209362091634749e-07, "loss": 0.314, "step": 3207 }, { "epoch": 2.690522784456248, "grad_norm": 0.34498400790516426, "learning_rate": 3.1921779976960776e-07, "loss": 0.325, "step": 3208 }, { "epoch": 2.6913614760972884, "grad_norm": 0.3200115281495064, "learning_rate": 3.175038515179224e-07, "loss": 0.2844, "step": 3209 }, { "epoch": 2.6922001677383283, "grad_norm": 0.2899053933699016, "learning_rate": 3.157943660419421e-07, "loss": 0.2744, "step": 3210 }, { "epoch": 2.693038859379368, "grad_norm": 0.30939636046218766, "learning_rate": 3.140893449709376e-07, "loss": 0.344, "step": 3211 }, { "epoch": 2.693877551020408, "grad_norm": 0.3269282522185548, "learning_rate": 3.123887899299233e-07, "loss": 0.3046, "step": 3212 }, { "epoch": 2.694716242661448, "grad_norm": 0.31507182437284775, "learning_rate": 3.1069270253965887e-07, "loss": 0.2843, "step": 3213 }, { "epoch": 2.6955549343024883, "grad_norm": 0.3279728011674002, "learning_rate": 3.09001084416643e-07, "loss": 0.3055, "step": 3214 }, { "epoch": 2.696393625943528, "grad_norm": 0.3226377068757219, "learning_rate": 3.073139371731182e-07, "loss": 0.3153, "step": 3215 }, { "epoch": 2.697232317584568, "grad_norm": 0.33151019749619715, "learning_rate": 3.056312624170643e-07, "loss": 0.292, "step": 3216 }, { "epoch": 2.698071009225608, "grad_norm": 0.3444575583557559, "learning_rate": 3.039530617521974e-07, "loss": 0.319, "step": 3217 }, { "epoch": 2.698909700866648, "grad_norm": 0.31966378084646746, "learning_rate": 3.022793367779714e-07, "loss": 0.2953, "step": 3218 }, { "epoch": 2.699748392507688, "grad_norm": 0.33067156932892056, "learning_rate": 3.006100890895741e-07, "loss": 0.2908, "step": 3219 }, { "epoch": 2.700587084148728, "grad_norm": 0.3510450379810195, "learning_rate": 2.989453202779252e-07, "loss": 0.3345, "step": 3220 }, { "epoch": 2.701425775789768, "grad_norm": 0.3152662171544038, "learning_rate": 2.9728503192967717e-07, "loss": 0.289, "step": 3221 }, { "epoch": 2.702264467430808, "grad_norm": 0.32040660924542075, "learning_rate": 2.9562922562720974e-07, "loss": 0.3192, "step": 3222 }, { "epoch": 2.7031031590718477, "grad_norm": 0.33314251969218256, "learning_rate": 2.939779029486334e-07, "loss": 0.2949, "step": 3223 }, { "epoch": 2.703941850712888, "grad_norm": 0.31043314910160635, "learning_rate": 2.9233106546778466e-07, "loss": 0.2643, "step": 3224 }, { "epoch": 2.704780542353928, "grad_norm": 0.31469835960142645, "learning_rate": 2.906887147542253e-07, "loss": 0.2967, "step": 3225 }, { "epoch": 2.705619233994968, "grad_norm": 0.3242027568670005, "learning_rate": 2.89050852373241e-07, "loss": 0.2993, "step": 3226 }, { "epoch": 2.7064579256360077, "grad_norm": 0.32820972774255947, "learning_rate": 2.8741747988583815e-07, "loss": 0.3068, "step": 3227 }, { "epoch": 2.7072966172770476, "grad_norm": 0.3297062543168489, "learning_rate": 2.8578859884874577e-07, "loss": 0.2993, "step": 3228 }, { "epoch": 2.708135308918088, "grad_norm": 0.3336721328865324, "learning_rate": 2.841642108144127e-07, "loss": 0.294, "step": 3229 }, { "epoch": 2.708974000559128, "grad_norm": 0.32606026618489137, "learning_rate": 2.825443173310033e-07, "loss": 0.2966, "step": 3230 }, { "epoch": 2.7098126922001677, "grad_norm": 0.32134723786144787, "learning_rate": 2.809289199423998e-07, "loss": 0.2851, "step": 3231 }, { "epoch": 2.7106513838412076, "grad_norm": 0.32484705723438295, "learning_rate": 2.7931802018819956e-07, "loss": 0.3141, "step": 3232 }, { "epoch": 2.7114900754822475, "grad_norm": 0.31991610965485273, "learning_rate": 2.7771161960371163e-07, "loss": 0.293, "step": 3233 }, { "epoch": 2.712328767123288, "grad_norm": 0.34972071461374205, "learning_rate": 2.761097197199597e-07, "loss": 0.3205, "step": 3234 }, { "epoch": 2.7131674587643277, "grad_norm": 0.31748935812512064, "learning_rate": 2.7451232206367527e-07, "loss": 0.2908, "step": 3235 }, { "epoch": 2.7140061504053676, "grad_norm": 0.30650891791333923, "learning_rate": 2.729194281573e-07, "loss": 0.279, "step": 3236 }, { "epoch": 2.7148448420464075, "grad_norm": 0.3511260410594288, "learning_rate": 2.7133103951898386e-07, "loss": 0.2883, "step": 3237 }, { "epoch": 2.7156835336874474, "grad_norm": 0.3289356585161041, "learning_rate": 2.697471576625821e-07, "loss": 0.285, "step": 3238 }, { "epoch": 2.7165222253284877, "grad_norm": 0.3230619256637718, "learning_rate": 2.6816778409765597e-07, "loss": 0.3137, "step": 3239 }, { "epoch": 2.7173609169695276, "grad_norm": 0.3179702988766927, "learning_rate": 2.665929203294665e-07, "loss": 0.2734, "step": 3240 }, { "epoch": 2.7181996086105675, "grad_norm": 0.34702800613336215, "learning_rate": 2.6502256785898016e-07, "loss": 0.3192, "step": 3241 }, { "epoch": 2.7190383002516074, "grad_norm": 0.3115019599714067, "learning_rate": 2.634567281828632e-07, "loss": 0.2789, "step": 3242 }, { "epoch": 2.7198769918926473, "grad_norm": 0.3329190971227159, "learning_rate": 2.618954027934789e-07, "loss": 0.3066, "step": 3243 }, { "epoch": 2.7207156835336876, "grad_norm": 0.2973558660032995, "learning_rate": 2.6033859317888986e-07, "loss": 0.2975, "step": 3244 }, { "epoch": 2.7215543751747275, "grad_norm": 0.31646813385713707, "learning_rate": 2.5878630082285474e-07, "loss": 0.3054, "step": 3245 }, { "epoch": 2.7223930668157674, "grad_norm": 0.3253475748372713, "learning_rate": 2.572385272048256e-07, "loss": 0.3142, "step": 3246 }, { "epoch": 2.7232317584568073, "grad_norm": 0.311307050904298, "learning_rate": 2.556952737999496e-07, "loss": 0.3109, "step": 3247 }, { "epoch": 2.724070450097847, "grad_norm": 0.3243728987120656, "learning_rate": 2.541565420790643e-07, "loss": 0.3087, "step": 3248 }, { "epoch": 2.7249091417388875, "grad_norm": 0.3437823565288826, "learning_rate": 2.52622333508698e-07, "loss": 0.3235, "step": 3249 }, { "epoch": 2.7257478333799274, "grad_norm": 0.8338355552588022, "learning_rate": 2.510926495510685e-07, "loss": 0.2849, "step": 3250 }, { "epoch": 2.7265865250209673, "grad_norm": 0.33928864809586196, "learning_rate": 2.4956749166408165e-07, "loss": 0.3013, "step": 3251 }, { "epoch": 2.727425216662007, "grad_norm": 0.3053022688285966, "learning_rate": 2.480468613013298e-07, "loss": 0.2936, "step": 3252 }, { "epoch": 2.728263908303047, "grad_norm": 0.33280070270976336, "learning_rate": 2.4653075991208807e-07, "loss": 0.3095, "step": 3253 }, { "epoch": 2.7291025999440874, "grad_norm": 0.3275687577783252, "learning_rate": 2.4501918894131727e-07, "loss": 0.2856, "step": 3254 }, { "epoch": 2.7299412915851273, "grad_norm": 0.3262992717803603, "learning_rate": 2.435121498296605e-07, "loss": 0.2679, "step": 3255 }, { "epoch": 2.730779983226167, "grad_norm": 0.3417672444819672, "learning_rate": 2.420096440134395e-07, "loss": 0.2954, "step": 3256 }, { "epoch": 2.731618674867207, "grad_norm": 0.3229168457566975, "learning_rate": 2.4051167292465706e-07, "loss": 0.2886, "step": 3257 }, { "epoch": 2.732457366508247, "grad_norm": 0.3311102058215647, "learning_rate": 2.3901823799099556e-07, "loss": 0.3445, "step": 3258 }, { "epoch": 2.7332960581492873, "grad_norm": 0.33914470767007, "learning_rate": 2.375293406358098e-07, "loss": 0.2709, "step": 3259 }, { "epoch": 2.7341347497903272, "grad_norm": 0.3380923017995888, "learning_rate": 2.360449822781341e-07, "loss": 0.2845, "step": 3260 }, { "epoch": 2.734973441431367, "grad_norm": 0.2977338443554987, "learning_rate": 2.3456516433267462e-07, "loss": 0.2758, "step": 3261 }, { "epoch": 2.735812133072407, "grad_norm": 0.33202562839593464, "learning_rate": 2.3308988820981037e-07, "loss": 0.315, "step": 3262 }, { "epoch": 2.736650824713447, "grad_norm": 0.30883053220379486, "learning_rate": 2.3161915531559332e-07, "loss": 0.2937, "step": 3263 }, { "epoch": 2.7374895163544872, "grad_norm": 0.3040893978468574, "learning_rate": 2.301529670517416e-07, "loss": 0.2937, "step": 3264 }, { "epoch": 2.738328207995527, "grad_norm": 0.32092336517527587, "learning_rate": 2.2869132481564694e-07, "loss": 0.2992, "step": 3265 }, { "epoch": 2.739166899636567, "grad_norm": 0.32350199014819137, "learning_rate": 2.2723423000036382e-07, "loss": 0.2796, "step": 3266 }, { "epoch": 2.740005591277607, "grad_norm": 0.3559541188019134, "learning_rate": 2.2578168399461474e-07, "loss": 0.3386, "step": 3267 }, { "epoch": 2.740844282918647, "grad_norm": 0.30106228204299706, "learning_rate": 2.24333688182789e-07, "loss": 0.2475, "step": 3268 }, { "epoch": 2.741682974559687, "grad_norm": 0.335165644165402, "learning_rate": 2.2289024394493374e-07, "loss": 0.3207, "step": 3269 }, { "epoch": 2.742521666200727, "grad_norm": 0.3185314222331562, "learning_rate": 2.2145135265676353e-07, "loss": 0.2626, "step": 3270 }, { "epoch": 2.743360357841767, "grad_norm": 0.3309467256240872, "learning_rate": 2.200170156896514e-07, "loss": 0.2886, "step": 3271 }, { "epoch": 2.744199049482807, "grad_norm": 0.30460843830852247, "learning_rate": 2.185872344106288e-07, "loss": 0.2899, "step": 3272 }, { "epoch": 2.7450377411238467, "grad_norm": 0.30683255419830746, "learning_rate": 2.1716201018238626e-07, "loss": 0.2706, "step": 3273 }, { "epoch": 2.745876432764887, "grad_norm": 0.32922913228914336, "learning_rate": 2.1574134436327166e-07, "loss": 0.3185, "step": 3274 }, { "epoch": 2.746715124405927, "grad_norm": 0.33171509665127075, "learning_rate": 2.1432523830728746e-07, "loss": 0.2947, "step": 3275 }, { "epoch": 2.747553816046967, "grad_norm": 0.330147375004835, "learning_rate": 2.1291369336409184e-07, "loss": 0.2891, "step": 3276 }, { "epoch": 2.7483925076880067, "grad_norm": 0.3294945879647039, "learning_rate": 2.115067108789931e-07, "loss": 0.2877, "step": 3277 }, { "epoch": 2.7492311993290466, "grad_norm": 0.3572671199173335, "learning_rate": 2.1010429219295415e-07, "loss": 0.3251, "step": 3278 }, { "epoch": 2.750069890970087, "grad_norm": 0.318922018501111, "learning_rate": 2.0870643864258523e-07, "loss": 0.286, "step": 3279 }, { "epoch": 2.750908582611127, "grad_norm": 0.3464101605180342, "learning_rate": 2.073131515601484e-07, "loss": 0.3515, "step": 3280 }, { "epoch": 2.7517472742521667, "grad_norm": 0.31040838281927413, "learning_rate": 2.059244322735532e-07, "loss": 0.2598, "step": 3281 }, { "epoch": 2.7525859658932066, "grad_norm": 0.32599275083124096, "learning_rate": 2.0454028210635303e-07, "loss": 0.3061, "step": 3282 }, { "epoch": 2.7534246575342465, "grad_norm": 0.31004410944313415, "learning_rate": 2.0316070237774933e-07, "loss": 0.2817, "step": 3283 }, { "epoch": 2.754263349175287, "grad_norm": 0.30963764075648087, "learning_rate": 2.017856944025881e-07, "loss": 0.2852, "step": 3284 }, { "epoch": 2.7551020408163263, "grad_norm": 0.3410410898898821, "learning_rate": 2.0041525949135444e-07, "loss": 0.3321, "step": 3285 }, { "epoch": 2.7559407324573666, "grad_norm": 0.3185189305213528, "learning_rate": 1.990493989501785e-07, "loss": 0.3259, "step": 3286 }, { "epoch": 2.7567794240984065, "grad_norm": 0.33369182746951687, "learning_rate": 1.97688114080829e-07, "loss": 0.2908, "step": 3287 }, { "epoch": 2.7576181157394464, "grad_norm": 0.3113674134180221, "learning_rate": 1.9633140618071477e-07, "loss": 0.2766, "step": 3288 }, { "epoch": 2.7584568073804867, "grad_norm": 0.31899533762089594, "learning_rate": 1.9497927654288206e-07, "loss": 0.2953, "step": 3289 }, { "epoch": 2.759295499021526, "grad_norm": 0.3170130633475692, "learning_rate": 1.936317264560128e-07, "loss": 0.2984, "step": 3290 }, { "epoch": 2.7601341906625665, "grad_norm": 0.32338947931824186, "learning_rate": 1.9228875720442518e-07, "loss": 0.3079, "step": 3291 }, { "epoch": 2.7609728823036064, "grad_norm": 0.329454592068513, "learning_rate": 1.9095037006807092e-07, "loss": 0.2986, "step": 3292 }, { "epoch": 2.7618115739446463, "grad_norm": 0.34725224727355347, "learning_rate": 1.8961656632253523e-07, "loss": 0.2959, "step": 3293 }, { "epoch": 2.7626502655856866, "grad_norm": 0.3043051000462982, "learning_rate": 1.882873472390351e-07, "loss": 0.261, "step": 3294 }, { "epoch": 2.763488957226726, "grad_norm": 0.33054050657323514, "learning_rate": 1.8696271408441657e-07, "loss": 0.3101, "step": 3295 }, { "epoch": 2.7643276488677664, "grad_norm": 0.3260495321085508, "learning_rate": 1.856426681211565e-07, "loss": 0.2961, "step": 3296 }, { "epoch": 2.7651663405088063, "grad_norm": 0.322498691172558, "learning_rate": 1.843272106073607e-07, "loss": 0.3239, "step": 3297 }, { "epoch": 2.766005032149846, "grad_norm": 0.3149699223026519, "learning_rate": 1.830163427967585e-07, "loss": 0.2784, "step": 3298 }, { "epoch": 2.766843723790886, "grad_norm": 0.3216604685485779, "learning_rate": 1.8171006593870722e-07, "loss": 0.3239, "step": 3299 }, { "epoch": 2.767682415431926, "grad_norm": 0.31577235512387036, "learning_rate": 1.8040838127818872e-07, "loss": 0.2985, "step": 3300 }, { "epoch": 2.7685211070729663, "grad_norm": 0.32380188373263163, "learning_rate": 1.7911129005580728e-07, "loss": 0.3166, "step": 3301 }, { "epoch": 2.769359798714006, "grad_norm": 0.33104057071159126, "learning_rate": 1.7781879350779075e-07, "loss": 0.3056, "step": 3302 }, { "epoch": 2.770198490355046, "grad_norm": 0.3086825173739268, "learning_rate": 1.7653089286598535e-07, "loss": 0.32, "step": 3303 }, { "epoch": 2.771037181996086, "grad_norm": 0.29462076391559106, "learning_rate": 1.7524758935786036e-07, "loss": 0.2811, "step": 3304 }, { "epoch": 2.771875873637126, "grad_norm": 0.3184260309740772, "learning_rate": 1.739688842064996e-07, "loss": 0.2755, "step": 3305 }, { "epoch": 2.772714565278166, "grad_norm": 0.32852069341818935, "learning_rate": 1.7269477863060768e-07, "loss": 0.322, "step": 3306 }, { "epoch": 2.773553256919206, "grad_norm": 0.3102255230315522, "learning_rate": 1.7142527384450548e-07, "loss": 0.2741, "step": 3307 }, { "epoch": 2.774391948560246, "grad_norm": 0.31416420386465954, "learning_rate": 1.7016037105812566e-07, "loss": 0.281, "step": 3308 }, { "epoch": 2.775230640201286, "grad_norm": 0.3272360948918985, "learning_rate": 1.689000714770178e-07, "loss": 0.3095, "step": 3309 }, { "epoch": 2.7760693318423257, "grad_norm": 0.31533150178922625, "learning_rate": 1.6764437630234386e-07, "loss": 0.2871, "step": 3310 }, { "epoch": 2.776908023483366, "grad_norm": 0.331105573948361, "learning_rate": 1.6639328673087652e-07, "loss": 0.2881, "step": 3311 }, { "epoch": 2.777746715124406, "grad_norm": 0.3090697302257919, "learning_rate": 1.6514680395500026e-07, "loss": 0.2805, "step": 3312 }, { "epoch": 2.778585406765446, "grad_norm": 0.33405293572898137, "learning_rate": 1.639049291627076e-07, "loss": 0.3181, "step": 3313 }, { "epoch": 2.7794240984064857, "grad_norm": 0.32028942602651683, "learning_rate": 1.6266766353759956e-07, "loss": 0.2791, "step": 3314 }, { "epoch": 2.7802627900475256, "grad_norm": 0.3422466729172419, "learning_rate": 1.6143500825888614e-07, "loss": 0.2974, "step": 3315 }, { "epoch": 2.781101481688566, "grad_norm": 0.33148284286313145, "learning_rate": 1.602069645013793e-07, "loss": 0.2986, "step": 3316 }, { "epoch": 2.781940173329606, "grad_norm": 0.3336787254012109, "learning_rate": 1.5898353343550055e-07, "loss": 0.3079, "step": 3317 }, { "epoch": 2.7827788649706457, "grad_norm": 0.3043880177466048, "learning_rate": 1.5776471622727108e-07, "loss": 0.2875, "step": 3318 }, { "epoch": 2.7836175566116856, "grad_norm": 0.33899166285767257, "learning_rate": 1.5655051403831778e-07, "loss": 0.3026, "step": 3319 }, { "epoch": 2.7844562482527255, "grad_norm": 0.3005117197447964, "learning_rate": 1.553409280258683e-07, "loss": 0.2856, "step": 3320 }, { "epoch": 2.785294939893766, "grad_norm": 0.30638962493339295, "learning_rate": 1.5413595934274882e-07, "loss": 0.3178, "step": 3321 }, { "epoch": 2.7861336315348058, "grad_norm": 0.3251469733700978, "learning_rate": 1.5293560913738737e-07, "loss": 0.3033, "step": 3322 }, { "epoch": 2.7869723231758456, "grad_norm": 0.332228894036241, "learning_rate": 1.517398785538088e-07, "loss": 0.2875, "step": 3323 }, { "epoch": 2.7878110148168855, "grad_norm": 0.3447499994348311, "learning_rate": 1.5054876873163592e-07, "loss": 0.3169, "step": 3324 }, { "epoch": 2.7886497064579254, "grad_norm": 0.3155852608424504, "learning_rate": 1.4936228080608684e-07, "loss": 0.2847, "step": 3325 }, { "epoch": 2.7894883980989658, "grad_norm": 0.3415191115441621, "learning_rate": 1.4818041590797526e-07, "loss": 0.3203, "step": 3326 }, { "epoch": 2.7903270897400057, "grad_norm": 0.31183398468555346, "learning_rate": 1.4700317516370744e-07, "loss": 0.2939, "step": 3327 }, { "epoch": 2.7911657813810455, "grad_norm": 0.31385645246775407, "learning_rate": 1.4583055969528527e-07, "loss": 0.3066, "step": 3328 }, { "epoch": 2.7920044730220854, "grad_norm": 0.3315226876319382, "learning_rate": 1.446625706202992e-07, "loss": 0.3078, "step": 3329 }, { "epoch": 2.7928431646631253, "grad_norm": 0.3413836952669226, "learning_rate": 1.434992090519327e-07, "loss": 0.3162, "step": 3330 }, { "epoch": 2.7936818563041657, "grad_norm": 0.29495156550784674, "learning_rate": 1.4234047609895774e-07, "loss": 0.2594, "step": 3331 }, { "epoch": 2.7945205479452055, "grad_norm": 0.34363761079650085, "learning_rate": 1.4118637286573478e-07, "loss": 0.3025, "step": 3332 }, { "epoch": 2.7953592395862454, "grad_norm": 0.3153425942981362, "learning_rate": 1.4003690045221284e-07, "loss": 0.2886, "step": 3333 }, { "epoch": 2.7961979312272853, "grad_norm": 0.32413148997327046, "learning_rate": 1.3889205995392562e-07, "loss": 0.3146, "step": 3334 }, { "epoch": 2.797036622868325, "grad_norm": 0.31334176203665076, "learning_rate": 1.3775185246199474e-07, "loss": 0.2791, "step": 3335 }, { "epoch": 2.7978753145093656, "grad_norm": 0.3358831704919261, "learning_rate": 1.366162790631237e-07, "loss": 0.3103, "step": 3336 }, { "epoch": 2.7987140061504054, "grad_norm": 0.31077354418648245, "learning_rate": 1.3548534083960117e-07, "loss": 0.2923, "step": 3337 }, { "epoch": 2.7995526977914453, "grad_norm": 0.3262802459043552, "learning_rate": 1.343590388692978e-07, "loss": 0.3068, "step": 3338 }, { "epoch": 2.8003913894324852, "grad_norm": 0.299219561095277, "learning_rate": 1.332373742256643e-07, "loss": 0.2608, "step": 3339 }, { "epoch": 2.801230081073525, "grad_norm": 0.3249976761899166, "learning_rate": 1.3212034797773276e-07, "loss": 0.3187, "step": 3340 }, { "epoch": 2.8020687727145654, "grad_norm": 0.3322686030036161, "learning_rate": 1.310079611901155e-07, "loss": 0.2936, "step": 3341 }, { "epoch": 2.8029074643556053, "grad_norm": 0.3021542507120965, "learning_rate": 1.2990021492299997e-07, "loss": 0.2723, "step": 3342 }, { "epoch": 2.8037461559966452, "grad_norm": 0.3228292890801843, "learning_rate": 1.2879711023215446e-07, "loss": 0.2986, "step": 3343 }, { "epoch": 2.804584847637685, "grad_norm": 0.31984767781123075, "learning_rate": 1.2769864816892074e-07, "loss": 0.3173, "step": 3344 }, { "epoch": 2.805423539278725, "grad_norm": 0.2973240344465135, "learning_rate": 1.2660482978021748e-07, "loss": 0.2677, "step": 3345 }, { "epoch": 2.8062622309197653, "grad_norm": 0.3408471313775837, "learning_rate": 1.255156561085369e-07, "loss": 0.3428, "step": 3346 }, { "epoch": 2.8071009225608052, "grad_norm": 0.3293494082206107, "learning_rate": 1.2443112819194415e-07, "loss": 0.2884, "step": 3347 }, { "epoch": 2.807939614201845, "grad_norm": 0.307520823822904, "learning_rate": 1.2335124706407685e-07, "loss": 0.3065, "step": 3348 }, { "epoch": 2.808778305842885, "grad_norm": 0.29623896127695837, "learning_rate": 1.2227601375414456e-07, "loss": 0.2722, "step": 3349 }, { "epoch": 2.809616997483925, "grad_norm": 0.31671066841723666, "learning_rate": 1.2120542928692635e-07, "loss": 0.3012, "step": 3350 }, { "epoch": 2.8104556891249652, "grad_norm": 0.32326895161081376, "learning_rate": 1.2013949468277164e-07, "loss": 0.289, "step": 3351 }, { "epoch": 2.811294380766005, "grad_norm": 0.34941076887573247, "learning_rate": 1.1907821095759553e-07, "loss": 0.3173, "step": 3352 }, { "epoch": 2.812133072407045, "grad_norm": 0.3134282146114077, "learning_rate": 1.1802157912288392e-07, "loss": 0.2702, "step": 3353 }, { "epoch": 2.812971764048085, "grad_norm": 0.32973409924103075, "learning_rate": 1.1696960018568737e-07, "loss": 0.2882, "step": 3354 }, { "epoch": 2.813810455689125, "grad_norm": 0.32460066089040124, "learning_rate": 1.1592227514862054e-07, "loss": 0.3218, "step": 3355 }, { "epoch": 2.814649147330165, "grad_norm": 0.3358043331435576, "learning_rate": 1.1487960500986606e-07, "loss": 0.2771, "step": 3356 }, { "epoch": 2.815487838971205, "grad_norm": 0.3665280696059328, "learning_rate": 1.138415907631657e-07, "loss": 0.3151, "step": 3357 }, { "epoch": 2.816326530612245, "grad_norm": 0.3157903588513499, "learning_rate": 1.1280823339782809e-07, "loss": 0.278, "step": 3358 }, { "epoch": 2.817165222253285, "grad_norm": 0.3444608145258044, "learning_rate": 1.1177953389872043e-07, "loss": 0.2923, "step": 3359 }, { "epoch": 2.8180039138943247, "grad_norm": 0.32018594775517534, "learning_rate": 1.1075549324627289e-07, "loss": 0.3096, "step": 3360 }, { "epoch": 2.818842605535365, "grad_norm": 0.33524040775232883, "learning_rate": 1.0973611241647308e-07, "loss": 0.2612, "step": 3361 }, { "epoch": 2.819681297176405, "grad_norm": 0.3296225099858199, "learning_rate": 1.0872139238086943e-07, "loss": 0.2988, "step": 3362 }, { "epoch": 2.820519988817445, "grad_norm": 0.3228698294285759, "learning_rate": 1.0771133410656664e-07, "loss": 0.2902, "step": 3363 }, { "epoch": 2.8213586804584847, "grad_norm": 0.32300714969526806, "learning_rate": 1.0670593855622857e-07, "loss": 0.3003, "step": 3364 }, { "epoch": 2.8221973720995246, "grad_norm": 0.3477268210864378, "learning_rate": 1.057052066880726e-07, "loss": 0.3202, "step": 3365 }, { "epoch": 2.823036063740565, "grad_norm": 0.3200789565395022, "learning_rate": 1.0470913945587246e-07, "loss": 0.2982, "step": 3366 }, { "epoch": 2.823874755381605, "grad_norm": 0.32187243487856826, "learning_rate": 1.0371773780895711e-07, "loss": 0.2917, "step": 3367 }, { "epoch": 2.8247134470226447, "grad_norm": 0.32150703860080443, "learning_rate": 1.0273100269220681e-07, "loss": 0.3179, "step": 3368 }, { "epoch": 2.8255521386636846, "grad_norm": 0.3243974883188044, "learning_rate": 1.0174893504605543e-07, "loss": 0.3103, "step": 3369 }, { "epoch": 2.8263908303047245, "grad_norm": 0.312374229667224, "learning_rate": 1.007715358064898e-07, "loss": 0.2896, "step": 3370 }, { "epoch": 2.827229521945765, "grad_norm": 0.3294779885813399, "learning_rate": 9.979880590504365e-08, "loss": 0.3179, "step": 3371 }, { "epoch": 2.8280682135868047, "grad_norm": 0.3447140592178482, "learning_rate": 9.883074626880429e-08, "loss": 0.3096, "step": 3372 }, { "epoch": 2.8289069052278446, "grad_norm": 0.3038320180198954, "learning_rate": 9.786735782040591e-08, "loss": 0.2879, "step": 3373 }, { "epoch": 2.8297455968688845, "grad_norm": 0.3150643334507808, "learning_rate": 9.69086414780307e-08, "loss": 0.2945, "step": 3374 }, { "epoch": 2.8305842885099244, "grad_norm": 0.32445094392277535, "learning_rate": 9.595459815540942e-08, "loss": 0.3185, "step": 3375 }, { "epoch": 2.8314229801509647, "grad_norm": 0.3102431209976904, "learning_rate": 9.500522876181695e-08, "loss": 0.2858, "step": 3376 }, { "epoch": 2.8322616717920046, "grad_norm": 0.3105455213417175, "learning_rate": 9.406053420207562e-08, "loss": 0.3098, "step": 3377 }, { "epoch": 2.8331003634330445, "grad_norm": 0.31910644429422214, "learning_rate": 9.312051537655075e-08, "loss": 0.3038, "step": 3378 }, { "epoch": 2.8339390550740844, "grad_norm": 0.31654825504246825, "learning_rate": 9.218517318115128e-08, "loss": 0.3153, "step": 3379 }, { "epoch": 2.8347777467151243, "grad_norm": 0.33796691856741995, "learning_rate": 9.12545085073313e-08, "loss": 0.3142, "step": 3380 }, { "epoch": 2.8356164383561646, "grad_norm": 0.30292453334207875, "learning_rate": 9.032852224208411e-08, "loss": 0.2794, "step": 3381 }, { "epoch": 2.8364551299972045, "grad_norm": 0.30178079383696965, "learning_rate": 8.940721526794483e-08, "loss": 0.306, "step": 3382 }, { "epoch": 2.8372938216382444, "grad_norm": 0.3188308088794349, "learning_rate": 8.849058846299052e-08, "loss": 0.3016, "step": 3383 }, { "epoch": 2.8381325132792843, "grad_norm": 0.3309002849797072, "learning_rate": 8.75786427008346e-08, "loss": 0.2988, "step": 3384 }, { "epoch": 2.838971204920324, "grad_norm": 0.32285055646180727, "learning_rate": 8.667137885063236e-08, "loss": 0.279, "step": 3385 }, { "epoch": 2.8398098965613645, "grad_norm": 0.3237950359431971, "learning_rate": 8.576879777707492e-08, "loss": 0.3098, "step": 3386 }, { "epoch": 2.8406485882024044, "grad_norm": 0.33304565845681955, "learning_rate": 8.48709003403908e-08, "loss": 0.3365, "step": 3387 }, { "epoch": 2.8414872798434443, "grad_norm": 0.33087967086808473, "learning_rate": 8.397768739634493e-08, "loss": 0.2798, "step": 3388 }, { "epoch": 2.842325971484484, "grad_norm": 0.3476828429073353, "learning_rate": 8.308915979623689e-08, "loss": 0.3129, "step": 3389 }, { "epoch": 2.843164663125524, "grad_norm": 0.3428028121408357, "learning_rate": 8.220531838690205e-08, "loss": 0.2912, "step": 3390 }, { "epoch": 2.8440033547665644, "grad_norm": 0.30971903884386903, "learning_rate": 8.132616401070714e-08, "loss": 0.2959, "step": 3391 }, { "epoch": 2.8448420464076043, "grad_norm": 0.31693956842234794, "learning_rate": 8.045169750555415e-08, "loss": 0.3026, "step": 3392 }, { "epoch": 2.845680738048644, "grad_norm": 0.33230591967687545, "learning_rate": 7.958191970487694e-08, "loss": 0.3169, "step": 3393 }, { "epoch": 2.846519429689684, "grad_norm": 0.34326097273062034, "learning_rate": 7.871683143763908e-08, "loss": 0.3519, "step": 3394 }, { "epoch": 2.847358121330724, "grad_norm": 0.293333421474531, "learning_rate": 7.785643352833605e-08, "loss": 0.2483, "step": 3395 }, { "epoch": 2.8481968129717643, "grad_norm": 0.3208199893049244, "learning_rate": 7.7000726796993e-08, "loss": 0.312, "step": 3396 }, { "epoch": 2.8490355046128037, "grad_norm": 0.3418834130177901, "learning_rate": 7.614971205916256e-08, "loss": 0.2951, "step": 3397 }, { "epoch": 2.849874196253844, "grad_norm": 0.32661163739400245, "learning_rate": 7.530339012592702e-08, "loss": 0.3129, "step": 3398 }, { "epoch": 2.850712887894884, "grad_norm": 0.3274388391592157, "learning_rate": 7.44617618038962e-08, "loss": 0.2891, "step": 3399 }, { "epoch": 2.851551579535924, "grad_norm": 0.31905873387733036, "learning_rate": 7.362482789520619e-08, "loss": 0.3071, "step": 3400 }, { "epoch": 2.852390271176964, "grad_norm": 0.30182254382050566, "learning_rate": 7.279258919751841e-08, "loss": 0.2897, "step": 3401 }, { "epoch": 2.8532289628180036, "grad_norm": 0.32056084087726927, "learning_rate": 7.196504650401893e-08, "loss": 0.3034, "step": 3402 }, { "epoch": 2.854067654459044, "grad_norm": 0.3117403680529429, "learning_rate": 7.11422006034207e-08, "loss": 0.2935, "step": 3403 }, { "epoch": 2.854906346100084, "grad_norm": 0.318811882844781, "learning_rate": 7.032405227995754e-08, "loss": 0.257, "step": 3404 }, { "epoch": 2.8557450377411238, "grad_norm": 0.3235834881550163, "learning_rate": 6.95106023133868e-08, "loss": 0.3171, "step": 3405 }, { "epoch": 2.856583729382164, "grad_norm": 0.31504988527833305, "learning_rate": 6.870185147898945e-08, "loss": 0.2745, "step": 3406 }, { "epoch": 2.8574224210232035, "grad_norm": 0.3272855041741569, "learning_rate": 6.789780054756557e-08, "loss": 0.3247, "step": 3407 }, { "epoch": 2.858261112664244, "grad_norm": 0.3179593624782596, "learning_rate": 6.709845028543716e-08, "loss": 0.27, "step": 3408 }, { "epoch": 2.8590998043052838, "grad_norm": 0.31010572513632184, "learning_rate": 6.630380145444648e-08, "loss": 0.2772, "step": 3409 }, { "epoch": 2.8599384959463237, "grad_norm": 0.32748874397769717, "learning_rate": 6.551385481195437e-08, "loss": 0.3354, "step": 3410 }, { "epoch": 2.860777187587364, "grad_norm": 0.3274485587136846, "learning_rate": 6.472861111084027e-08, "loss": 0.3091, "step": 3411 }, { "epoch": 2.8616158792284034, "grad_norm": 0.3399628441345108, "learning_rate": 6.394807109950051e-08, "loss": 0.2812, "step": 3412 }, { "epoch": 2.8624545708694438, "grad_norm": 0.3410600876758744, "learning_rate": 6.317223552185003e-08, "loss": 0.3084, "step": 3413 }, { "epoch": 2.8632932625104837, "grad_norm": 0.3287587746024916, "learning_rate": 6.2401105117319e-08, "loss": 0.2789, "step": 3414 }, { "epoch": 2.8641319541515236, "grad_norm": 0.3226345182338136, "learning_rate": 6.163468062085288e-08, "loss": 0.3013, "step": 3415 }, { "epoch": 2.8649706457925634, "grad_norm": 0.33318379093160855, "learning_rate": 6.087296276291343e-08, "loss": 0.2985, "step": 3416 }, { "epoch": 2.8658093374336033, "grad_norm": 0.32631275577841645, "learning_rate": 6.011595226947553e-08, "loss": 0.2833, "step": 3417 }, { "epoch": 2.8666480290746437, "grad_norm": 0.33684100958833785, "learning_rate": 5.9363649862027586e-08, "loss": 0.3258, "step": 3418 }, { "epoch": 2.8674867207156836, "grad_norm": 0.30296123832097055, "learning_rate": 5.861605625757216e-08, "loss": 0.2839, "step": 3419 }, { "epoch": 2.8683254123567234, "grad_norm": 0.3345865391150336, "learning_rate": 5.7873172168621536e-08, "loss": 0.2998, "step": 3420 }, { "epoch": 2.8691641039977633, "grad_norm": 0.3391736929273414, "learning_rate": 5.713499830320157e-08, "loss": 0.2962, "step": 3421 }, { "epoch": 2.8700027956388032, "grad_norm": 0.31630867639602406, "learning_rate": 5.640153536484838e-08, "loss": 0.2588, "step": 3422 }, { "epoch": 2.8708414872798436, "grad_norm": 0.32172953204278987, "learning_rate": 5.567278405260779e-08, "loss": 0.2982, "step": 3423 }, { "epoch": 2.8716801789208835, "grad_norm": 0.31543653442592273, "learning_rate": 5.4948745061035314e-08, "loss": 0.3161, "step": 3424 }, { "epoch": 2.8725188705619233, "grad_norm": 0.31669871271303723, "learning_rate": 5.422941908019563e-08, "loss": 0.3132, "step": 3425 }, { "epoch": 2.8733575622029632, "grad_norm": 0.3112356069083607, "learning_rate": 5.3514806795660344e-08, "loss": 0.262, "step": 3426 }, { "epoch": 2.874196253844003, "grad_norm": 0.32010187849502314, "learning_rate": 5.2804908888510754e-08, "loss": 0.3296, "step": 3427 }, { "epoch": 2.8750349454850435, "grad_norm": 0.31991520895070275, "learning_rate": 5.209972603533286e-08, "loss": 0.2632, "step": 3428 }, { "epoch": 2.8758736371260833, "grad_norm": 0.36233544677251167, "learning_rate": 5.1399258908219615e-08, "loss": 0.3187, "step": 3429 }, { "epoch": 2.8767123287671232, "grad_norm": 0.31265756139291606, "learning_rate": 5.070350817476977e-08, "loss": 0.2981, "step": 3430 }, { "epoch": 2.877551020408163, "grad_norm": 0.2936332916677767, "learning_rate": 5.001247449808677e-08, "loss": 0.267, "step": 3431 }, { "epoch": 2.878389712049203, "grad_norm": 0.30937520905618054, "learning_rate": 4.93261585367788e-08, "loss": 0.3018, "step": 3432 }, { "epoch": 2.8792284036902434, "grad_norm": 0.32664469297236315, "learning_rate": 4.864456094495595e-08, "loss": 0.3388, "step": 3433 }, { "epoch": 2.8800670953312832, "grad_norm": 0.3084721961610264, "learning_rate": 4.796768237223415e-08, "loss": 0.2958, "step": 3434 }, { "epoch": 2.880905786972323, "grad_norm": 0.3298504258031435, "learning_rate": 4.729552346372957e-08, "loss": 0.3074, "step": 3435 }, { "epoch": 2.881744478613363, "grad_norm": 0.2921580768009581, "learning_rate": 4.6628084860060876e-08, "loss": 0.2723, "step": 3436 }, { "epoch": 2.882583170254403, "grad_norm": 0.31711543580881135, "learning_rate": 4.596536719734812e-08, "loss": 0.3293, "step": 3437 }, { "epoch": 2.8834218618954433, "grad_norm": 0.31910456585447267, "learning_rate": 4.530737110721162e-08, "loss": 0.3092, "step": 3438 }, { "epoch": 2.884260553536483, "grad_norm": 0.30448158259588864, "learning_rate": 4.465409721677194e-08, "loss": 0.3012, "step": 3439 }, { "epoch": 2.885099245177523, "grad_norm": 0.3033212618185904, "learning_rate": 4.4005546148649383e-08, "loss": 0.3139, "step": 3440 }, { "epoch": 2.885937936818563, "grad_norm": 0.340032690080538, "learning_rate": 4.336171852096116e-08, "loss": 0.3308, "step": 3441 }, { "epoch": 2.886776628459603, "grad_norm": 0.3098050767869641, "learning_rate": 4.272261494732532e-08, "loss": 0.2606, "step": 3442 }, { "epoch": 2.887615320100643, "grad_norm": 0.3496882068909074, "learning_rate": 4.208823603685574e-08, "loss": 0.3243, "step": 3443 }, { "epoch": 2.888454011741683, "grad_norm": 0.3337123375869265, "learning_rate": 4.1458582394163784e-08, "loss": 0.3113, "step": 3444 }, { "epoch": 2.889292703382723, "grad_norm": 0.32305269544936255, "learning_rate": 4.083365461935773e-08, "loss": 0.3148, "step": 3445 }, { "epoch": 2.890131395023763, "grad_norm": 0.3208801139338084, "learning_rate": 4.0213453308040604e-08, "loss": 0.273, "step": 3446 }, { "epoch": 2.8909700866648027, "grad_norm": 0.3218630115279472, "learning_rate": 3.959797905131291e-08, "loss": 0.3342, "step": 3447 }, { "epoch": 2.891808778305843, "grad_norm": 0.3401779924435694, "learning_rate": 3.8987232435767076e-08, "loss": 0.3157, "step": 3448 }, { "epoch": 2.892647469946883, "grad_norm": 0.32792717084309064, "learning_rate": 3.838121404349193e-08, "loss": 0.3017, "step": 3449 }, { "epoch": 2.893486161587923, "grad_norm": 0.3391245955419882, "learning_rate": 3.777992445206935e-08, "loss": 0.3086, "step": 3450 }, { "epoch": 2.8943248532289627, "grad_norm": 0.3256904768829621, "learning_rate": 3.718336423457369e-08, "loss": 0.3019, "step": 3451 }, { "epoch": 2.8951635448700026, "grad_norm": 0.3270330371880087, "learning_rate": 3.6591533959572935e-08, "loss": 0.2953, "step": 3452 }, { "epoch": 2.896002236511043, "grad_norm": 0.2913898634963861, "learning_rate": 3.600443419112532e-08, "loss": 0.2609, "step": 3453 }, { "epoch": 2.896840928152083, "grad_norm": 0.34006456222969544, "learning_rate": 3.542206548878269e-08, "loss": 0.3689, "step": 3454 }, { "epoch": 2.8976796197931227, "grad_norm": 0.28612170623779465, "learning_rate": 3.484442840758662e-08, "loss": 0.2646, "step": 3455 }, { "epoch": 2.8985183114341626, "grad_norm": 0.316506026800336, "learning_rate": 3.4271523498068946e-08, "loss": 0.3181, "step": 3456 }, { "epoch": 2.8993570030752025, "grad_norm": 0.30070027375285724, "learning_rate": 3.37033513062518e-08, "loss": 0.2644, "step": 3457 }, { "epoch": 2.900195694716243, "grad_norm": 0.3374173307012074, "learning_rate": 3.3139912373646445e-08, "loss": 0.338, "step": 3458 }, { "epoch": 2.9010343863572827, "grad_norm": 0.3068929022476151, "learning_rate": 3.2581207237253884e-08, "loss": 0.2982, "step": 3459 }, { "epoch": 2.9018730779983226, "grad_norm": 0.3245402838679383, "learning_rate": 3.202723642956151e-08, "loss": 0.3349, "step": 3460 }, { "epoch": 2.9027117696393625, "grad_norm": 0.3171863174970875, "learning_rate": 3.1478000478546986e-08, "loss": 0.3053, "step": 3461 }, { "epoch": 2.9035504612804024, "grad_norm": 0.32992060111116767, "learning_rate": 3.093349990767269e-08, "loss": 0.3221, "step": 3462 }, { "epoch": 2.9043891529214427, "grad_norm": 0.31530053857169815, "learning_rate": 3.0393735235890734e-08, "loss": 0.3181, "step": 3463 }, { "epoch": 2.9052278445624826, "grad_norm": 0.3347717627254626, "learning_rate": 2.985870697763682e-08, "loss": 0.3103, "step": 3464 }, { "epoch": 2.9060665362035225, "grad_norm": 0.34631467545811984, "learning_rate": 2.932841564283362e-08, "loss": 0.294, "step": 3465 }, { "epoch": 2.9069052278445624, "grad_norm": 0.3287211650194993, "learning_rate": 2.8802861736890175e-08, "loss": 0.2989, "step": 3466 }, { "epoch": 2.9077439194856023, "grad_norm": 0.3247491608444035, "learning_rate": 2.828204576069804e-08, "loss": 0.2922, "step": 3467 }, { "epoch": 2.9085826111266426, "grad_norm": 0.33351385841807213, "learning_rate": 2.7765968210635708e-08, "loss": 0.3075, "step": 3468 }, { "epoch": 2.9094213027676825, "grad_norm": 0.3015039311086779, "learning_rate": 2.7254629578564173e-08, "loss": 0.2774, "step": 3469 }, { "epoch": 2.9102599944087224, "grad_norm": 0.3101447422549914, "learning_rate": 2.674803035182749e-08, "loss": 0.3293, "step": 3470 }, { "epoch": 2.9110986860497623, "grad_norm": 0.3122093924718378, "learning_rate": 2.6246171013253886e-08, "loss": 0.2677, "step": 3471 }, { "epoch": 2.911937377690802, "grad_norm": 0.32964413633335193, "learning_rate": 2.574905204115352e-08, "loss": 0.2982, "step": 3472 }, { "epoch": 2.9127760693318425, "grad_norm": 0.31947364170631903, "learning_rate": 2.525667390931852e-08, "loss": 0.3061, "step": 3473 }, { "epoch": 2.9136147609728824, "grad_norm": 0.3199199362942566, "learning_rate": 2.476903708702294e-08, "loss": 0.2709, "step": 3474 }, { "epoch": 2.9144534526139223, "grad_norm": 0.31983294903143944, "learning_rate": 2.428614203902169e-08, "loss": 0.3025, "step": 3475 }, { "epoch": 2.915292144254962, "grad_norm": 0.339834729771881, "learning_rate": 2.3807989225551054e-08, "loss": 0.2985, "step": 3476 }, { "epoch": 2.916130835896002, "grad_norm": 0.3016227573085241, "learning_rate": 2.333457910232595e-08, "loss": 0.2811, "step": 3477 }, { "epoch": 2.9169695275370424, "grad_norm": 0.322342325659821, "learning_rate": 2.2865912120543234e-08, "loss": 0.2938, "step": 3478 }, { "epoch": 2.9178082191780823, "grad_norm": 0.35002295596994626, "learning_rate": 2.2401988726878376e-08, "loss": 0.3258, "step": 3479 }, { "epoch": 2.918646910819122, "grad_norm": 0.31915569404888855, "learning_rate": 2.1942809363484918e-08, "loss": 0.271, "step": 3480 }, { "epoch": 2.919485602460162, "grad_norm": 0.32982121736422787, "learning_rate": 2.1488374467996675e-08, "loss": 0.2966, "step": 3481 }, { "epoch": 2.920324294101202, "grad_norm": 0.33666102592770325, "learning_rate": 2.1038684473523862e-08, "loss": 0.3291, "step": 3482 }, { "epoch": 2.9211629857422423, "grad_norm": 0.29725613757324154, "learning_rate": 2.059373980865642e-08, "loss": 0.2692, "step": 3483 }, { "epoch": 2.922001677383282, "grad_norm": 0.31626264328298237, "learning_rate": 2.0153540897459024e-08, "loss": 0.306, "step": 3484 }, { "epoch": 2.922840369024322, "grad_norm": 0.31774474920732676, "learning_rate": 1.9718088159476068e-08, "loss": 0.282, "step": 3485 }, { "epoch": 2.923679060665362, "grad_norm": 0.3432869298508989, "learning_rate": 1.9287382009726686e-08, "loss": 0.3306, "step": 3486 }, { "epoch": 2.924517752306402, "grad_norm": 0.3356637276020508, "learning_rate": 1.886142285870751e-08, "loss": 0.3022, "step": 3487 }, { "epoch": 2.925356443947442, "grad_norm": 0.3252871289035269, "learning_rate": 1.8440211112388252e-08, "loss": 0.2769, "step": 3488 }, { "epoch": 2.926195135588482, "grad_norm": 0.32740523956467343, "learning_rate": 1.8023747172217775e-08, "loss": 0.2859, "step": 3489 }, { "epoch": 2.927033827229522, "grad_norm": 0.3221799244325682, "learning_rate": 1.761203143511636e-08, "loss": 0.3227, "step": 3490 }, { "epoch": 2.927872518870562, "grad_norm": 0.3125604552779726, "learning_rate": 1.7205064293481787e-08, "loss": 0.2611, "step": 3491 }, { "epoch": 2.9287112105116018, "grad_norm": 0.31111651236082255, "learning_rate": 1.6802846135183794e-08, "loss": 0.3108, "step": 3492 }, { "epoch": 2.929549902152642, "grad_norm": 0.31316769747977286, "learning_rate": 1.6405377343567398e-08, "loss": 0.3236, "step": 3493 }, { "epoch": 2.930388593793682, "grad_norm": 0.3237683287010528, "learning_rate": 1.601265829745069e-08, "loss": 0.3177, "step": 3494 }, { "epoch": 2.931227285434722, "grad_norm": 0.32172196270179637, "learning_rate": 1.562468937112427e-08, "loss": 0.3154, "step": 3495 }, { "epoch": 2.9320659770757618, "grad_norm": 0.3240257413689657, "learning_rate": 1.524147093435291e-08, "loss": 0.2887, "step": 3496 }, { "epoch": 2.9329046687168017, "grad_norm": 0.3226013616467527, "learning_rate": 1.4863003352372229e-08, "loss": 0.297, "step": 3497 }, { "epoch": 2.933743360357842, "grad_norm": 0.3066512419670944, "learning_rate": 1.448928698589147e-08, "loss": 0.3025, "step": 3498 }, { "epoch": 2.934582051998882, "grad_norm": 0.3228976683701698, "learning_rate": 1.4120322191090163e-08, "loss": 0.3057, "step": 3499 }, { "epoch": 2.935420743639922, "grad_norm": 0.32268206595099863, "learning_rate": 1.3756109319620348e-08, "loss": 0.295, "step": 3500 }, { "epoch": 2.9362594352809617, "grad_norm": 0.3328385501164705, "learning_rate": 1.3396648718604355e-08, "loss": 0.2496, "step": 3501 }, { "epoch": 2.9370981269220016, "grad_norm": 0.3415493674949276, "learning_rate": 1.3041940730635361e-08, "loss": 0.3349, "step": 3502 }, { "epoch": 2.937936818563042, "grad_norm": 0.30857930948188983, "learning_rate": 1.2691985693777387e-08, "loss": 0.2925, "step": 3503 }, { "epoch": 2.938775510204082, "grad_norm": 0.28632070166915047, "learning_rate": 1.2346783941564744e-08, "loss": 0.2857, "step": 3504 }, { "epoch": 2.9396142018451217, "grad_norm": 0.3302615930339971, "learning_rate": 1.2006335802999813e-08, "loss": 0.2702, "step": 3505 }, { "epoch": 2.9404528934861616, "grad_norm": 0.334132720492708, "learning_rate": 1.1670641602556932e-08, "loss": 0.293, "step": 3506 }, { "epoch": 2.9412915851272015, "grad_norm": 0.34262151175899686, "learning_rate": 1.1339701660177393e-08, "loss": 0.2942, "step": 3507 }, { "epoch": 2.942130276768242, "grad_norm": 0.3314231790279173, "learning_rate": 1.101351629127223e-08, "loss": 0.2705, "step": 3508 }, { "epoch": 2.9429689684092817, "grad_norm": 0.3582255336633891, "learning_rate": 1.0692085806721097e-08, "loss": 0.3491, "step": 3509 }, { "epoch": 2.9438076600503216, "grad_norm": 0.33236590861665466, "learning_rate": 1.037541051287172e-08, "loss": 0.3095, "step": 3510 }, { "epoch": 2.9446463516913615, "grad_norm": 0.29841379893779024, "learning_rate": 1.0063490711540446e-08, "loss": 0.2735, "step": 3511 }, { "epoch": 2.9454850433324014, "grad_norm": 0.33478528655749923, "learning_rate": 9.756326700009478e-09, "loss": 0.3141, "step": 3512 }, { "epoch": 2.9463237349734417, "grad_norm": 0.34054109442575187, "learning_rate": 9.45391877103019e-09, "loss": 0.2859, "step": 3513 }, { "epoch": 2.947162426614481, "grad_norm": 0.3212703101244818, "learning_rate": 9.156267212820368e-09, "loss": 0.2875, "step": 3514 }, { "epoch": 2.9480011182555215, "grad_norm": 0.3136650237734106, "learning_rate": 8.863372309064756e-09, "loss": 0.3101, "step": 3515 }, { "epoch": 2.9488398098965614, "grad_norm": 0.30130429671973946, "learning_rate": 8.57523433891394e-09, "loss": 0.2793, "step": 3516 }, { "epoch": 2.9496785015376012, "grad_norm": 0.3331311602287532, "learning_rate": 8.291853576986031e-09, "loss": 0.3194, "step": 3517 }, { "epoch": 2.9505171931786416, "grad_norm": 0.32544375657716007, "learning_rate": 8.013230293363872e-09, "loss": 0.2828, "step": 3518 }, { "epoch": 2.951355884819681, "grad_norm": 0.3203600412931918, "learning_rate": 7.739364753597267e-09, "loss": 0.2935, "step": 3519 }, { "epoch": 2.9521945764607214, "grad_norm": 0.35667488415522963, "learning_rate": 7.470257218700761e-09, "loss": 0.3091, "step": 3520 }, { "epoch": 2.9530332681017613, "grad_norm": 0.30522384369819927, "learning_rate": 7.205907945153634e-09, "loss": 0.2969, "step": 3521 }, { "epoch": 2.953871959742801, "grad_norm": 0.3098030592405468, "learning_rate": 6.946317184902129e-09, "loss": 0.2795, "step": 3522 }, { "epoch": 2.9547106513838415, "grad_norm": 0.3363041410913813, "learning_rate": 6.6914851853555615e-09, "loss": 0.3019, "step": 3523 }, { "epoch": 2.955549343024881, "grad_norm": 0.3196469410659631, "learning_rate": 6.441412189387985e-09, "loss": 0.2942, "step": 3524 }, { "epoch": 2.9563880346659213, "grad_norm": 0.3144496546957473, "learning_rate": 6.1960984353376385e-09, "loss": 0.2868, "step": 3525 }, { "epoch": 2.957226726306961, "grad_norm": 0.33747475556118517, "learning_rate": 5.955544157008053e-09, "loss": 0.3025, "step": 3526 }, { "epoch": 2.958065417948001, "grad_norm": 0.3319948559387743, "learning_rate": 5.7197495836658345e-09, "loss": 0.3182, "step": 3527 }, { "epoch": 2.958904109589041, "grad_norm": 0.32597947366545954, "learning_rate": 5.488714940040662e-09, "loss": 0.3152, "step": 3528 }, { "epoch": 2.959742801230081, "grad_norm": 0.2992845670185653, "learning_rate": 5.2624404463264e-09, "loss": 0.2834, "step": 3529 }, { "epoch": 2.960581492871121, "grad_norm": 0.34559899071285965, "learning_rate": 5.040926318179429e-09, "loss": 0.3179, "step": 3530 }, { "epoch": 2.961420184512161, "grad_norm": 0.3119735653169502, "learning_rate": 4.824172766720314e-09, "loss": 0.2664, "step": 3531 }, { "epoch": 2.962258876153201, "grad_norm": 0.326672610108967, "learning_rate": 4.61217999853103e-09, "loss": 0.3139, "step": 3532 }, { "epoch": 2.963097567794241, "grad_norm": 0.3184407309269851, "learning_rate": 4.404948215657179e-09, "loss": 0.2651, "step": 3533 }, { "epoch": 2.9639362594352807, "grad_norm": 0.33214042553153783, "learning_rate": 4.202477615606882e-09, "loss": 0.2955, "step": 3534 }, { "epoch": 2.964774951076321, "grad_norm": 0.31908699226460374, "learning_rate": 4.004768391349112e-09, "loss": 0.285, "step": 3535 }, { "epoch": 2.965613642717361, "grad_norm": 0.3355308664569336, "learning_rate": 3.811820731317029e-09, "loss": 0.2885, "step": 3536 }, { "epoch": 2.966452334358401, "grad_norm": 0.329427681927574, "learning_rate": 3.623634819403532e-09, "loss": 0.2983, "step": 3537 }, { "epoch": 2.9672910259994407, "grad_norm": 0.3292420399454674, "learning_rate": 3.4402108349640418e-09, "loss": 0.2996, "step": 3538 }, { "epoch": 2.9681297176404806, "grad_norm": 0.31422299550364885, "learning_rate": 3.2615489528159406e-09, "loss": 0.2776, "step": 3539 }, { "epoch": 2.968968409281521, "grad_norm": 0.3080286844356622, "learning_rate": 3.0876493432380193e-09, "loss": 0.2845, "step": 3540 }, { "epoch": 2.969807100922561, "grad_norm": 0.3056997141347387, "learning_rate": 2.9185121719688127e-09, "loss": 0.2932, "step": 3541 }, { "epoch": 2.9706457925636007, "grad_norm": 0.321857371193236, "learning_rate": 2.7541376002099275e-09, "loss": 0.3062, "step": 3542 }, { "epoch": 2.9714844842046406, "grad_norm": 0.3157666090878027, "learning_rate": 2.5945257846227145e-09, "loss": 0.3059, "step": 3543 }, { "epoch": 2.9723231758456805, "grad_norm": 0.3314083811604011, "learning_rate": 2.4396768773288226e-09, "loss": 0.2918, "step": 3544 }, { "epoch": 2.973161867486721, "grad_norm": 0.3149457816074819, "learning_rate": 2.2895910259118637e-09, "loss": 0.2852, "step": 3545 }, { "epoch": 2.9740005591277607, "grad_norm": 0.33467294802121833, "learning_rate": 2.144268373414082e-09, "loss": 0.3181, "step": 3546 }, { "epoch": 2.9748392507688006, "grad_norm": 0.3308663575196361, "learning_rate": 2.0037090583402417e-09, "loss": 0.3147, "step": 3547 }, { "epoch": 2.9756779424098405, "grad_norm": 0.3201265220286845, "learning_rate": 1.867913214652628e-09, "loss": 0.303, "step": 3548 }, { "epoch": 2.9765166340508804, "grad_norm": 0.33774701702685195, "learning_rate": 1.7368809717766e-09, "loss": 0.3112, "step": 3549 }, { "epoch": 2.9773553256919207, "grad_norm": 0.3120952416971821, "learning_rate": 1.6106124545950396e-09, "loss": 0.3124, "step": 3550 }, { "epoch": 2.9781940173329606, "grad_norm": 0.3372923960212687, "learning_rate": 1.4891077834511268e-09, "loss": 0.3043, "step": 3551 }, { "epoch": 2.9790327089740005, "grad_norm": 0.31963691644380765, "learning_rate": 1.3723670741488948e-09, "loss": 0.3051, "step": 3552 }, { "epoch": 2.9798714006150404, "grad_norm": 0.3231137410126566, "learning_rate": 1.260390437950454e-09, "loss": 0.3123, "step": 3553 }, { "epoch": 2.9807100922560803, "grad_norm": 0.3194014479345862, "learning_rate": 1.1531779815787681e-09, "loss": 0.3116, "step": 3554 }, { "epoch": 2.9815487838971206, "grad_norm": 0.3044677738879503, "learning_rate": 1.0507298072143234e-09, "loss": 0.2727, "step": 3555 }, { "epoch": 2.9823874755381605, "grad_norm": 0.3343865021508516, "learning_rate": 9.5304601249957e-10, "loss": 0.322, "step": 3556 }, { "epoch": 2.9832261671792004, "grad_norm": 0.31564986717215504, "learning_rate": 8.601266905333694e-10, "loss": 0.3053, "step": 3557 }, { "epoch": 2.9840648588202403, "grad_norm": 0.33553027566866467, "learning_rate": 7.719719298754369e-10, "loss": 0.2959, "step": 3558 }, { "epoch": 2.98490355046128, "grad_norm": 0.3122679414624867, "learning_rate": 6.885818145441203e-10, "loss": 0.2922, "step": 3559 }, { "epoch": 2.9857422421023205, "grad_norm": 0.3204361731650129, "learning_rate": 6.099564240164002e-10, "loss": 0.3131, "step": 3560 }, { "epoch": 2.9865809337433604, "grad_norm": 0.318112055824983, "learning_rate": 5.360958332278898e-10, "loss": 0.2855, "step": 3561 }, { "epoch": 2.9874196253844003, "grad_norm": 0.3090895704259432, "learning_rate": 4.670001125739454e-10, "loss": 0.26, "step": 3562 }, { "epoch": 2.98825831702544, "grad_norm": 0.31087823888051874, "learning_rate": 4.026693279074456e-10, "loss": 0.2951, "step": 3563 }, { "epoch": 2.98909700866648, "grad_norm": 0.2954851831566492, "learning_rate": 3.4310354054101214e-10, "loss": 0.3091, "step": 3564 }, { "epoch": 2.9899357003075204, "grad_norm": 0.32487451259662947, "learning_rate": 2.883028072453442e-10, "loss": 0.33, "step": 3565 }, { "epoch": 2.9907743919485603, "grad_norm": 0.3171659252180118, "learning_rate": 2.382671802497738e-10, "loss": 0.2989, "step": 3566 }, { "epoch": 2.9916130835896, "grad_norm": 0.3294883897935688, "learning_rate": 1.929967072417105e-10, "loss": 0.3185, "step": 3567 }, { "epoch": 2.99245177523064, "grad_norm": 0.31252573771596226, "learning_rate": 1.524914313677517e-10, "loss": 0.3013, "step": 3568 }, { "epoch": 2.99329046687168, "grad_norm": 0.30345391242083675, "learning_rate": 1.1675139123201728e-10, "loss": 0.2772, "step": 3569 }, { "epoch": 2.9941291585127203, "grad_norm": 0.3086974462022825, "learning_rate": 8.577662089837013e-11, "loss": 0.2913, "step": 3570 }, { "epoch": 2.99496785015376, "grad_norm": 0.3218729043399233, "learning_rate": 5.956714988708534e-11, "loss": 0.314, "step": 3571 }, { "epoch": 2.9958065417948, "grad_norm": 0.3129955766964488, "learning_rate": 3.812300317818096e-11, "loss": 0.2836, "step": 3572 }, { "epoch": 2.99664523343584, "grad_norm": 0.3337673858564134, "learning_rate": 2.1444201209752657e-11, "loss": 0.2811, "step": 3573 }, { "epoch": 2.99748392507688, "grad_norm": 0.3204178139505855, "learning_rate": 9.530759877973695e-12, "loss": 0.3011, "step": 3574 }, { "epoch": 2.99832261671792, "grad_norm": 0.2926242564667438, "learning_rate": 2.382690537094945e-12, "loss": 0.2746, "step": 3575 }, { "epoch": 2.99916130835896, "grad_norm": 0.31340191279463797, "learning_rate": 0.0, "loss": 0.3175, "step": 3576 }, { "epoch": 2.99916130835896, "step": 3576, "total_flos": 2855994935508992.0, "train_loss": 0.3465774535874459, "train_runtime": 45650.2365, "train_samples_per_second": 7.522, "train_steps_per_second": 0.078 } ], "logging_steps": 1, "max_steps": 3576, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2855994935508992.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }