{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 0, "global_step": 7474, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001337971635001338, "grad_norm": 153.94268798828125, "learning_rate": 9.998662028364998e-07, "loss": 0.7547, "step": 1 }, { "epoch": 0.0002675943270002676, "grad_norm": 156.0087890625, "learning_rate": 9.997324056729997e-07, "loss": 0.4447, "step": 2 }, { "epoch": 0.0004013914905004014, "grad_norm": 126.08484649658203, "learning_rate": 9.995986085094996e-07, "loss": 0.5753, "step": 3 }, { "epoch": 0.0005351886540005352, "grad_norm": 220.88833618164062, "learning_rate": 9.994648113459995e-07, "loss": 0.9113, "step": 4 }, { "epoch": 0.000668985817500669, "grad_norm": 96.56456756591797, "learning_rate": 9.993310141824994e-07, "loss": 0.6059, "step": 5 }, { "epoch": 0.0008027829810008028, "grad_norm": 66.32806396484375, "learning_rate": 9.991972170189992e-07, "loss": 0.3555, "step": 6 }, { "epoch": 0.0009365801445009366, "grad_norm": 206.52401733398438, "learning_rate": 9.990634198554991e-07, "loss": 0.874, "step": 7 }, { "epoch": 0.0010703773080010704, "grad_norm": 87.55257415771484, "learning_rate": 9.989296226919988e-07, "loss": 0.4952, "step": 8 }, { "epoch": 0.001204174471501204, "grad_norm": 57.25172805786133, "learning_rate": 9.987958255284987e-07, "loss": 0.594, "step": 9 }, { "epoch": 0.001337971635001338, "grad_norm": 161.78453063964844, "learning_rate": 9.986620283649985e-07, "loss": 0.602, "step": 10 }, { "epoch": 0.0014717687985014718, "grad_norm": 26.5186710357666, "learning_rate": 9.985282312014984e-07, "loss": 0.538, "step": 11 }, { "epoch": 0.0016055659620016055, "grad_norm": 112.02456665039062, "learning_rate": 9.983944340379983e-07, "loss": 0.5882, "step": 12 }, { "epoch": 0.0017393631255017393, "grad_norm": 37.167030334472656, "learning_rate": 9.982606368744982e-07, "loss": 0.6333, "step": 13 }, { "epoch": 0.0018731602890018732, "grad_norm": 55.95832824707031, "learning_rate": 9.98126839710998e-07, "loss": 0.6163, "step": 14 }, { "epoch": 0.002006957452502007, "grad_norm": 52.30416488647461, "learning_rate": 9.97993042547498e-07, "loss": 0.5742, "step": 15 }, { "epoch": 0.0021407546160021407, "grad_norm": 43.40435028076172, "learning_rate": 9.978592453839978e-07, "loss": 0.5529, "step": 16 }, { "epoch": 0.0022745517795022745, "grad_norm": 34.061317443847656, "learning_rate": 9.977254482204977e-07, "loss": 0.5573, "step": 17 }, { "epoch": 0.002408348943002408, "grad_norm": 21.99197769165039, "learning_rate": 9.975916510569976e-07, "loss": 0.5308, "step": 18 }, { "epoch": 0.002542146106502542, "grad_norm": 99.7840805053711, "learning_rate": 9.974578538934974e-07, "loss": 0.9071, "step": 19 }, { "epoch": 0.002675943270002676, "grad_norm": 26.018207550048828, "learning_rate": 9.973240567299973e-07, "loss": 0.4285, "step": 20 }, { "epoch": 0.00280974043350281, "grad_norm": 73.99343872070312, "learning_rate": 9.971902595664972e-07, "loss": 0.6916, "step": 21 }, { "epoch": 0.0029435375970029436, "grad_norm": 62.607662200927734, "learning_rate": 9.97056462402997e-07, "loss": 0.685, "step": 22 }, { "epoch": 0.0030773347605030773, "grad_norm": 33.15293502807617, "learning_rate": 9.96922665239497e-07, "loss": 0.5709, "step": 23 }, { "epoch": 0.003211131924003211, "grad_norm": 16.63718605041504, "learning_rate": 9.967888680759966e-07, "loss": 0.4943, "step": 24 }, { "epoch": 0.003344929087503345, "grad_norm": 40.4935302734375, "learning_rate": 9.966550709124965e-07, "loss": 0.4227, "step": 25 }, { "epoch": 0.0034787262510034785, "grad_norm": 11.906813621520996, "learning_rate": 9.965212737489966e-07, "loss": 0.5615, "step": 26 }, { "epoch": 0.0036125234145036127, "grad_norm": 28.17329216003418, "learning_rate": 9.963874765854965e-07, "loss": 0.5264, "step": 27 }, { "epoch": 0.0037463205780037465, "grad_norm": 23.820280075073242, "learning_rate": 9.962536794219962e-07, "loss": 0.6427, "step": 28 }, { "epoch": 0.00388011774150388, "grad_norm": 31.91474723815918, "learning_rate": 9.96119882258496e-07, "loss": 0.472, "step": 29 }, { "epoch": 0.004013914905004014, "grad_norm": 11.115065574645996, "learning_rate": 9.95986085094996e-07, "loss": 0.5671, "step": 30 }, { "epoch": 0.004147712068504148, "grad_norm": 30.9346981048584, "learning_rate": 9.958522879314958e-07, "loss": 0.6242, "step": 31 }, { "epoch": 0.004281509232004281, "grad_norm": 15.954740524291992, "learning_rate": 9.957184907679957e-07, "loss": 0.5834, "step": 32 }, { "epoch": 0.004415306395504416, "grad_norm": 42.15216064453125, "learning_rate": 9.955846936044955e-07, "loss": 0.3828, "step": 33 }, { "epoch": 0.004549103559004549, "grad_norm": 24.38614273071289, "learning_rate": 9.954508964409954e-07, "loss": 0.637, "step": 34 }, { "epoch": 0.004682900722504683, "grad_norm": 14.429488182067871, "learning_rate": 9.953170992774953e-07, "loss": 0.5384, "step": 35 }, { "epoch": 0.004816697886004816, "grad_norm": 23.431161880493164, "learning_rate": 9.951833021139952e-07, "loss": 0.5875, "step": 36 }, { "epoch": 0.0049504950495049506, "grad_norm": 23.903627395629883, "learning_rate": 9.95049504950495e-07, "loss": 0.5366, "step": 37 }, { "epoch": 0.005084292213005084, "grad_norm": 30.819042205810547, "learning_rate": 9.94915707786995e-07, "loss": 0.426, "step": 38 }, { "epoch": 0.005218089376505218, "grad_norm": 16.94266700744629, "learning_rate": 9.947819106234946e-07, "loss": 0.4456, "step": 39 }, { "epoch": 0.005351886540005352, "grad_norm": 20.177934646606445, "learning_rate": 9.946481134599945e-07, "loss": 0.4356, "step": 40 }, { "epoch": 0.0054856837035054855, "grad_norm": 63.00517272949219, "learning_rate": 9.945143162964946e-07, "loss": 0.6703, "step": 41 }, { "epoch": 0.00561948086700562, "grad_norm": 44.83641815185547, "learning_rate": 9.943805191329945e-07, "loss": 0.5356, "step": 42 }, { "epoch": 0.005753278030505753, "grad_norm": 46.97093200683594, "learning_rate": 9.942467219694941e-07, "loss": 0.5233, "step": 43 }, { "epoch": 0.005887075194005887, "grad_norm": 27.333566665649414, "learning_rate": 9.94112924805994e-07, "loss": 0.5151, "step": 44 }, { "epoch": 0.0060208723575060205, "grad_norm": 38.97282028198242, "learning_rate": 9.939791276424939e-07, "loss": 0.7587, "step": 45 }, { "epoch": 0.006154669521006155, "grad_norm": 38.45254898071289, "learning_rate": 9.938453304789938e-07, "loss": 0.4906, "step": 46 }, { "epoch": 0.006288466684506289, "grad_norm": 23.768251419067383, "learning_rate": 9.937115333154936e-07, "loss": 0.7103, "step": 47 }, { "epoch": 0.006422263848006422, "grad_norm": 55.38822555541992, "learning_rate": 9.935777361519935e-07, "loss": 0.7183, "step": 48 }, { "epoch": 0.006556061011506556, "grad_norm": 46.217628479003906, "learning_rate": 9.934439389884934e-07, "loss": 0.5445, "step": 49 }, { "epoch": 0.00668985817500669, "grad_norm": 37.99263381958008, "learning_rate": 9.933101418249933e-07, "loss": 0.4591, "step": 50 }, { "epoch": 0.006823655338506824, "grad_norm": 72.31514739990234, "learning_rate": 9.931763446614932e-07, "loss": 0.4505, "step": 51 }, { "epoch": 0.006957452502006957, "grad_norm": 18.87731170654297, "learning_rate": 9.93042547497993e-07, "loss": 0.5499, "step": 52 }, { "epoch": 0.007091249665507091, "grad_norm": 25.828163146972656, "learning_rate": 9.92908750334493e-07, "loss": 0.376, "step": 53 }, { "epoch": 0.0072250468290072254, "grad_norm": 32.08184051513672, "learning_rate": 9.927749531709926e-07, "loss": 0.5988, "step": 54 }, { "epoch": 0.007358843992507359, "grad_norm": 28.96098518371582, "learning_rate": 9.926411560074925e-07, "loss": 0.5061, "step": 55 }, { "epoch": 0.007492641156007493, "grad_norm": 28.637956619262695, "learning_rate": 9.925073588439926e-07, "loss": 0.4503, "step": 56 }, { "epoch": 0.007626438319507626, "grad_norm": 64.53285217285156, "learning_rate": 9.923735616804924e-07, "loss": 0.7576, "step": 57 }, { "epoch": 0.00776023548300776, "grad_norm": 75.2042236328125, "learning_rate": 9.92239764516992e-07, "loss": 0.9545, "step": 58 }, { "epoch": 0.007894032646507895, "grad_norm": 52.87149429321289, "learning_rate": 9.92105967353492e-07, "loss": 0.7359, "step": 59 }, { "epoch": 0.008027829810008028, "grad_norm": 28.44051742553711, "learning_rate": 9.919721701899919e-07, "loss": 0.5349, "step": 60 }, { "epoch": 0.008161626973508161, "grad_norm": 15.476859092712402, "learning_rate": 9.918383730264917e-07, "loss": 0.5132, "step": 61 }, { "epoch": 0.008295424137008296, "grad_norm": 51.01866912841797, "learning_rate": 9.917045758629916e-07, "loss": 0.3655, "step": 62 }, { "epoch": 0.00842922130050843, "grad_norm": 45.629920959472656, "learning_rate": 9.915707786994915e-07, "loss": 0.5148, "step": 63 }, { "epoch": 0.008563018464008563, "grad_norm": 20.70054054260254, "learning_rate": 9.914369815359914e-07, "loss": 0.5572, "step": 64 }, { "epoch": 0.008696815627508696, "grad_norm": 50.97990417480469, "learning_rate": 9.913031843724913e-07, "loss": 0.4539, "step": 65 }, { "epoch": 0.008830612791008831, "grad_norm": 56.28893280029297, "learning_rate": 9.911693872089911e-07, "loss": 0.5019, "step": 66 }, { "epoch": 0.008964409954508964, "grad_norm": 50.34807205200195, "learning_rate": 9.91035590045491e-07, "loss": 0.4686, "step": 67 }, { "epoch": 0.009098207118009098, "grad_norm": 29.993785858154297, "learning_rate": 9.90901792881991e-07, "loss": 0.6252, "step": 68 }, { "epoch": 0.009232004281509233, "grad_norm": 19.4514102935791, "learning_rate": 9.907679957184908e-07, "loss": 0.3352, "step": 69 }, { "epoch": 0.009365801445009366, "grad_norm": 22.55575180053711, "learning_rate": 9.906341985549907e-07, "loss": 0.5184, "step": 70 }, { "epoch": 0.0094995986085095, "grad_norm": 36.30824661254883, "learning_rate": 9.905004013914905e-07, "loss": 0.5349, "step": 71 }, { "epoch": 0.009633395772009633, "grad_norm": 16.004980087280273, "learning_rate": 9.903666042279904e-07, "loss": 0.2966, "step": 72 }, { "epoch": 0.009767192935509768, "grad_norm": 20.882448196411133, "learning_rate": 9.902328070644903e-07, "loss": 0.3086, "step": 73 }, { "epoch": 0.009900990099009901, "grad_norm": 26.98880386352539, "learning_rate": 9.9009900990099e-07, "loss": 0.5409, "step": 74 }, { "epoch": 0.010034787262510034, "grad_norm": 61.62352752685547, "learning_rate": 9.899652127374898e-07, "loss": 0.789, "step": 75 }, { "epoch": 0.010168584426010168, "grad_norm": 42.93553161621094, "learning_rate": 9.898314155739897e-07, "loss": 0.5571, "step": 76 }, { "epoch": 0.010302381589510303, "grad_norm": 84.17617797851562, "learning_rate": 9.896976184104896e-07, "loss": 0.9816, "step": 77 }, { "epoch": 0.010436178753010436, "grad_norm": 25.727781295776367, "learning_rate": 9.895638212469895e-07, "loss": 0.5069, "step": 78 }, { "epoch": 0.01056997591651057, "grad_norm": 32.30039596557617, "learning_rate": 9.894300240834894e-07, "loss": 0.566, "step": 79 }, { "epoch": 0.010703773080010704, "grad_norm": 28.826736450195312, "learning_rate": 9.892962269199892e-07, "loss": 0.1641, "step": 80 }, { "epoch": 0.010837570243510838, "grad_norm": 23.311487197875977, "learning_rate": 9.891624297564891e-07, "loss": 0.4055, "step": 81 }, { "epoch": 0.010971367407010971, "grad_norm": 26.798765182495117, "learning_rate": 9.89028632592989e-07, "loss": 0.5606, "step": 82 }, { "epoch": 0.011105164570511104, "grad_norm": 12.162820816040039, "learning_rate": 9.888948354294889e-07, "loss": 0.574, "step": 83 }, { "epoch": 0.01123896173401124, "grad_norm": 21.961275100708008, "learning_rate": 9.887610382659888e-07, "loss": 0.5561, "step": 84 }, { "epoch": 0.011372758897511373, "grad_norm": 23.370407104492188, "learning_rate": 9.886272411024886e-07, "loss": 0.3984, "step": 85 }, { "epoch": 0.011506556061011506, "grad_norm": 15.90936279296875, "learning_rate": 9.884934439389885e-07, "loss": 0.6116, "step": 86 }, { "epoch": 0.011640353224511641, "grad_norm": 21.821765899658203, "learning_rate": 9.883596467754884e-07, "loss": 0.5412, "step": 87 }, { "epoch": 0.011774150388011774, "grad_norm": 11.008634567260742, "learning_rate": 9.882258496119883e-07, "loss": 0.6626, "step": 88 }, { "epoch": 0.011907947551511908, "grad_norm": 15.92811393737793, "learning_rate": 9.88092052448488e-07, "loss": 0.525, "step": 89 }, { "epoch": 0.012041744715012041, "grad_norm": 20.10358428955078, "learning_rate": 9.879582552849878e-07, "loss": 0.6513, "step": 90 }, { "epoch": 0.012175541878512176, "grad_norm": 16.40944480895996, "learning_rate": 9.878244581214877e-07, "loss": 0.6041, "step": 91 }, { "epoch": 0.01230933904201231, "grad_norm": 10.530786514282227, "learning_rate": 9.876906609579878e-07, "loss": 0.523, "step": 92 }, { "epoch": 0.012443136205512443, "grad_norm": 36.8993034362793, "learning_rate": 9.875568637944875e-07, "loss": 0.4599, "step": 93 }, { "epoch": 0.012576933369012578, "grad_norm": 15.143503189086914, "learning_rate": 9.874230666309873e-07, "loss": 0.5894, "step": 94 }, { "epoch": 0.012710730532512711, "grad_norm": 58.1686897277832, "learning_rate": 9.872892694674872e-07, "loss": 0.7202, "step": 95 }, { "epoch": 0.012844527696012844, "grad_norm": 21.70572853088379, "learning_rate": 9.87155472303987e-07, "loss": 0.364, "step": 96 }, { "epoch": 0.012978324859512978, "grad_norm": 39.490699768066406, "learning_rate": 9.87021675140487e-07, "loss": 0.6808, "step": 97 }, { "epoch": 0.013112122023013113, "grad_norm": 17.027238845825195, "learning_rate": 9.868878779769869e-07, "loss": 0.5427, "step": 98 }, { "epoch": 0.013245919186513246, "grad_norm": 7.477230548858643, "learning_rate": 9.867540808134867e-07, "loss": 0.4726, "step": 99 }, { "epoch": 0.01337971635001338, "grad_norm": 20.803356170654297, "learning_rate": 9.866202836499866e-07, "loss": 0.5258, "step": 100 }, { "epoch": 0.013513513513513514, "grad_norm": 41.74342346191406, "learning_rate": 9.864864864864865e-07, "loss": 0.6083, "step": 101 }, { "epoch": 0.013647310677013648, "grad_norm": 17.93121910095215, "learning_rate": 9.863526893229864e-07, "loss": 0.4261, "step": 102 }, { "epoch": 0.013781107840513781, "grad_norm": 16.341951370239258, "learning_rate": 9.862188921594862e-07, "loss": 0.548, "step": 103 }, { "epoch": 0.013914905004013914, "grad_norm": 15.236637115478516, "learning_rate": 9.86085094995986e-07, "loss": 0.5264, "step": 104 }, { "epoch": 0.01404870216751405, "grad_norm": 29.76473617553711, "learning_rate": 9.859512978324858e-07, "loss": 0.5176, "step": 105 }, { "epoch": 0.014182499331014183, "grad_norm": 17.293731689453125, "learning_rate": 9.858175006689857e-07, "loss": 0.4871, "step": 106 }, { "epoch": 0.014316296494514316, "grad_norm": 19.846126556396484, "learning_rate": 9.856837035054858e-07, "loss": 0.4828, "step": 107 }, { "epoch": 0.014450093658014451, "grad_norm": 32.264102935791016, "learning_rate": 9.855499063419854e-07, "loss": 0.6576, "step": 108 }, { "epoch": 0.014583890821514584, "grad_norm": 36.08279037475586, "learning_rate": 9.854161091784853e-07, "loss": 0.5862, "step": 109 }, { "epoch": 0.014717687985014717, "grad_norm": 15.164156913757324, "learning_rate": 9.852823120149852e-07, "loss": 0.5147, "step": 110 }, { "epoch": 0.01485148514851485, "grad_norm": 44.845733642578125, "learning_rate": 9.85148514851485e-07, "loss": 0.696, "step": 111 }, { "epoch": 0.014985282312014986, "grad_norm": 16.587377548217773, "learning_rate": 9.85014717687985e-07, "loss": 0.523, "step": 112 }, { "epoch": 0.01511907947551512, "grad_norm": 28.191667556762695, "learning_rate": 9.848809205244848e-07, "loss": 0.343, "step": 113 }, { "epoch": 0.015252876639015252, "grad_norm": 18.207569122314453, "learning_rate": 9.847471233609847e-07, "loss": 0.5568, "step": 114 }, { "epoch": 0.015386673802515387, "grad_norm": 12.054566383361816, "learning_rate": 9.846133261974846e-07, "loss": 0.4707, "step": 115 }, { "epoch": 0.01552047096601552, "grad_norm": 14.004857063293457, "learning_rate": 9.844795290339845e-07, "loss": 0.701, "step": 116 }, { "epoch": 0.015654268129515654, "grad_norm": 45.63251876831055, "learning_rate": 9.843457318704843e-07, "loss": 0.4949, "step": 117 }, { "epoch": 0.01578806529301579, "grad_norm": 39.01028060913086, "learning_rate": 9.842119347069842e-07, "loss": 0.634, "step": 118 }, { "epoch": 0.01592186245651592, "grad_norm": 35.620628356933594, "learning_rate": 9.84078137543484e-07, "loss": 0.4249, "step": 119 }, { "epoch": 0.016055659620016056, "grad_norm": 14.283676147460938, "learning_rate": 9.839443403799838e-07, "loss": 0.5107, "step": 120 }, { "epoch": 0.01618945678351619, "grad_norm": 29.358863830566406, "learning_rate": 9.838105432164837e-07, "loss": 0.6483, "step": 121 }, { "epoch": 0.016323253947016322, "grad_norm": 31.07655143737793, "learning_rate": 9.836767460529837e-07, "loss": 0.6284, "step": 122 }, { "epoch": 0.016457051110516457, "grad_norm": 31.927175521850586, "learning_rate": 9.835429488894836e-07, "loss": 0.3563, "step": 123 }, { "epoch": 0.016590848274016592, "grad_norm": 22.668601989746094, "learning_rate": 9.834091517259833e-07, "loss": 0.4969, "step": 124 }, { "epoch": 0.016724645437516724, "grad_norm": 36.73610305786133, "learning_rate": 9.832753545624832e-07, "loss": 0.7297, "step": 125 }, { "epoch": 0.01685844260101686, "grad_norm": 22.381183624267578, "learning_rate": 9.83141557398983e-07, "loss": 0.5619, "step": 126 }, { "epoch": 0.01699223976451699, "grad_norm": 24.720203399658203, "learning_rate": 9.83007760235483e-07, "loss": 0.5357, "step": 127 }, { "epoch": 0.017126036928017126, "grad_norm": 41.036224365234375, "learning_rate": 9.828739630719828e-07, "loss": 0.6461, "step": 128 }, { "epoch": 0.01725983409151726, "grad_norm": 38.702857971191406, "learning_rate": 9.827401659084827e-07, "loss": 0.4043, "step": 129 }, { "epoch": 0.017393631255017392, "grad_norm": 16.72785186767578, "learning_rate": 9.826063687449826e-07, "loss": 0.6708, "step": 130 }, { "epoch": 0.017527428418517527, "grad_norm": 34.862831115722656, "learning_rate": 9.824725715814824e-07, "loss": 0.4887, "step": 131 }, { "epoch": 0.017661225582017662, "grad_norm": 19.050689697265625, "learning_rate": 9.823387744179823e-07, "loss": 0.4868, "step": 132 }, { "epoch": 0.017795022745517794, "grad_norm": 12.097559928894043, "learning_rate": 9.822049772544822e-07, "loss": 0.463, "step": 133 }, { "epoch": 0.01792881990901793, "grad_norm": 18.71462059020996, "learning_rate": 9.82071180090982e-07, "loss": 0.4137, "step": 134 }, { "epoch": 0.018062617072518064, "grad_norm": 42.17513656616211, "learning_rate": 9.819373829274818e-07, "loss": 0.6429, "step": 135 }, { "epoch": 0.018196414236018196, "grad_norm": 13.48039436340332, "learning_rate": 9.818035857639818e-07, "loss": 0.4156, "step": 136 }, { "epoch": 0.01833021139951833, "grad_norm": 49.489158630371094, "learning_rate": 9.816697886004817e-07, "loss": 0.7119, "step": 137 }, { "epoch": 0.018464008563018466, "grad_norm": 22.816030502319336, "learning_rate": 9.815359914369816e-07, "loss": 0.4514, "step": 138 }, { "epoch": 0.018597805726518597, "grad_norm": 10.031615257263184, "learning_rate": 9.814021942734813e-07, "loss": 0.399, "step": 139 }, { "epoch": 0.018731602890018732, "grad_norm": 27.976482391357422, "learning_rate": 9.812683971099811e-07, "loss": 0.5454, "step": 140 }, { "epoch": 0.018865400053518864, "grad_norm": 19.82789421081543, "learning_rate": 9.81134599946481e-07, "loss": 0.5387, "step": 141 }, { "epoch": 0.018999197217019, "grad_norm": 19.963842391967773, "learning_rate": 9.81000802782981e-07, "loss": 0.4297, "step": 142 }, { "epoch": 0.019132994380519134, "grad_norm": 12.969765663146973, "learning_rate": 9.808670056194808e-07, "loss": 0.5717, "step": 143 }, { "epoch": 0.019266791544019266, "grad_norm": 50.279972076416016, "learning_rate": 9.807332084559807e-07, "loss": 0.4063, "step": 144 }, { "epoch": 0.0194005887075194, "grad_norm": 17.025346755981445, "learning_rate": 9.805994112924805e-07, "loss": 0.489, "step": 145 }, { "epoch": 0.019534385871019536, "grad_norm": 24.924968719482422, "learning_rate": 9.804656141289804e-07, "loss": 0.4458, "step": 146 }, { "epoch": 0.019668183034519667, "grad_norm": 35.7073860168457, "learning_rate": 9.803318169654803e-07, "loss": 0.6295, "step": 147 }, { "epoch": 0.019801980198019802, "grad_norm": 34.52640151977539, "learning_rate": 9.801980198019802e-07, "loss": 0.4543, "step": 148 }, { "epoch": 0.019935777361519937, "grad_norm": 43.62965774536133, "learning_rate": 9.8006422263848e-07, "loss": 0.6497, "step": 149 }, { "epoch": 0.02006957452502007, "grad_norm": 8.462423324584961, "learning_rate": 9.7993042547498e-07, "loss": 0.3796, "step": 150 }, { "epoch": 0.020203371688520204, "grad_norm": 16.452775955200195, "learning_rate": 9.797966283114798e-07, "loss": 0.4846, "step": 151 }, { "epoch": 0.020337168852020335, "grad_norm": 17.432998657226562, "learning_rate": 9.796628311479797e-07, "loss": 0.5127, "step": 152 }, { "epoch": 0.02047096601552047, "grad_norm": 26.148113250732422, "learning_rate": 9.795290339844796e-07, "loss": 0.5303, "step": 153 }, { "epoch": 0.020604763179020606, "grad_norm": 14.63249683380127, "learning_rate": 9.793952368209795e-07, "loss": 0.5467, "step": 154 }, { "epoch": 0.020738560342520737, "grad_norm": 32.48606491088867, "learning_rate": 9.792614396574791e-07, "loss": 0.6154, "step": 155 }, { "epoch": 0.020872357506020872, "grad_norm": 36.01746368408203, "learning_rate": 9.79127642493979e-07, "loss": 0.3737, "step": 156 }, { "epoch": 0.021006154669521007, "grad_norm": 39.24868392944336, "learning_rate": 9.789938453304789e-07, "loss": 0.4689, "step": 157 }, { "epoch": 0.02113995183302114, "grad_norm": 15.68065357208252, "learning_rate": 9.78860048166979e-07, "loss": 0.4462, "step": 158 }, { "epoch": 0.021273748996521274, "grad_norm": 40.858089447021484, "learning_rate": 9.787262510034786e-07, "loss": 0.3191, "step": 159 }, { "epoch": 0.02140754616002141, "grad_norm": 25.862213134765625, "learning_rate": 9.785924538399785e-07, "loss": 0.4464, "step": 160 }, { "epoch": 0.02154134332352154, "grad_norm": 8.73212718963623, "learning_rate": 9.784586566764784e-07, "loss": 0.4226, "step": 161 }, { "epoch": 0.021675140487021675, "grad_norm": 30.69873046875, "learning_rate": 9.783248595129783e-07, "loss": 0.5283, "step": 162 }, { "epoch": 0.02180893765052181, "grad_norm": 24.516950607299805, "learning_rate": 9.781910623494782e-07, "loss": 0.6922, "step": 163 }, { "epoch": 0.021942734814021942, "grad_norm": 43.645503997802734, "learning_rate": 9.78057265185978e-07, "loss": 0.7168, "step": 164 }, { "epoch": 0.022076531977522077, "grad_norm": 7.280452251434326, "learning_rate": 9.77923468022478e-07, "loss": 0.3862, "step": 165 }, { "epoch": 0.02221032914102221, "grad_norm": 20.300607681274414, "learning_rate": 9.777896708589778e-07, "loss": 0.3353, "step": 166 }, { "epoch": 0.022344126304522344, "grad_norm": 13.733163833618164, "learning_rate": 9.776558736954777e-07, "loss": 0.3542, "step": 167 }, { "epoch": 0.02247792346802248, "grad_norm": 58.210411071777344, "learning_rate": 9.775220765319776e-07, "loss": 0.7801, "step": 168 }, { "epoch": 0.02261172063152261, "grad_norm": 31.140893936157227, "learning_rate": 9.773882793684774e-07, "loss": 0.5741, "step": 169 }, { "epoch": 0.022745517795022745, "grad_norm": 27.565919876098633, "learning_rate": 9.77254482204977e-07, "loss": 0.377, "step": 170 }, { "epoch": 0.02287931495852288, "grad_norm": 56.492862701416016, "learning_rate": 9.77120685041477e-07, "loss": 0.4705, "step": 171 }, { "epoch": 0.023013112122023012, "grad_norm": 34.349639892578125, "learning_rate": 9.769868878779769e-07, "loss": 0.2813, "step": 172 }, { "epoch": 0.023146909285523147, "grad_norm": 20.693248748779297, "learning_rate": 9.76853090714477e-07, "loss": 0.3189, "step": 173 }, { "epoch": 0.023280706449023282, "grad_norm": 30.47025489807129, "learning_rate": 9.767192935509766e-07, "loss": 0.4135, "step": 174 }, { "epoch": 0.023414503612523414, "grad_norm": 24.515195846557617, "learning_rate": 9.765854963874765e-07, "loss": 0.6402, "step": 175 }, { "epoch": 0.02354830077602355, "grad_norm": 16.61606788635254, "learning_rate": 9.764516992239764e-07, "loss": 0.2809, "step": 176 }, { "epoch": 0.023682097939523684, "grad_norm": 45.72105026245117, "learning_rate": 9.763179020604763e-07, "loss": 0.5968, "step": 177 }, { "epoch": 0.023815895103023815, "grad_norm": 15.793168067932129, "learning_rate": 9.761841048969761e-07, "loss": 0.4389, "step": 178 }, { "epoch": 0.02394969226652395, "grad_norm": 34.86378860473633, "learning_rate": 9.76050307733476e-07, "loss": 0.7963, "step": 179 }, { "epoch": 0.024083489430024082, "grad_norm": 28.19921875, "learning_rate": 9.759165105699759e-07, "loss": 0.5236, "step": 180 }, { "epoch": 0.024217286593524217, "grad_norm": 56.83399963378906, "learning_rate": 9.757827134064758e-07, "loss": 0.7808, "step": 181 }, { "epoch": 0.024351083757024352, "grad_norm": 15.20799446105957, "learning_rate": 9.756489162429757e-07, "loss": 0.4978, "step": 182 }, { "epoch": 0.024484880920524484, "grad_norm": 32.27178192138672, "learning_rate": 9.755151190794755e-07, "loss": 0.4894, "step": 183 }, { "epoch": 0.02461867808402462, "grad_norm": 12.587854385375977, "learning_rate": 9.753813219159754e-07, "loss": 0.4479, "step": 184 }, { "epoch": 0.024752475247524754, "grad_norm": 16.971675872802734, "learning_rate": 9.75247524752475e-07, "loss": 0.433, "step": 185 }, { "epoch": 0.024886272411024885, "grad_norm": 24.3765869140625, "learning_rate": 9.75113727588975e-07, "loss": 0.3115, "step": 186 }, { "epoch": 0.02502006957452502, "grad_norm": 8.886162757873535, "learning_rate": 9.749799304254748e-07, "loss": 0.5717, "step": 187 }, { "epoch": 0.025153866738025155, "grad_norm": 33.35957717895508, "learning_rate": 9.74846133261975e-07, "loss": 0.5927, "step": 188 }, { "epoch": 0.025287663901525287, "grad_norm": 15.663122177124023, "learning_rate": 9.747123360984746e-07, "loss": 0.4545, "step": 189 }, { "epoch": 0.025421461065025422, "grad_norm": 18.5145263671875, "learning_rate": 9.745785389349745e-07, "loss": 0.5419, "step": 190 }, { "epoch": 0.025555258228525557, "grad_norm": 13.059778213500977, "learning_rate": 9.744447417714744e-07, "loss": 0.436, "step": 191 }, { "epoch": 0.02568905539202569, "grad_norm": 19.93691062927246, "learning_rate": 9.743109446079742e-07, "loss": 0.5758, "step": 192 }, { "epoch": 0.025822852555525824, "grad_norm": 34.530521392822266, "learning_rate": 9.741771474444741e-07, "loss": 0.4169, "step": 193 }, { "epoch": 0.025956649719025955, "grad_norm": 16.583072662353516, "learning_rate": 9.74043350280974e-07, "loss": 0.635, "step": 194 }, { "epoch": 0.02609044688252609, "grad_norm": 14.868497848510742, "learning_rate": 9.739095531174739e-07, "loss": 0.6339, "step": 195 }, { "epoch": 0.026224244046026225, "grad_norm": 18.945556640625, "learning_rate": 9.737757559539738e-07, "loss": 0.4795, "step": 196 }, { "epoch": 0.026358041209526357, "grad_norm": 19.149673461914062, "learning_rate": 9.736419587904736e-07, "loss": 0.5699, "step": 197 }, { "epoch": 0.026491838373026492, "grad_norm": 14.66524887084961, "learning_rate": 9.735081616269735e-07, "loss": 0.5446, "step": 198 }, { "epoch": 0.026625635536526627, "grad_norm": 28.46867561340332, "learning_rate": 9.733743644634734e-07, "loss": 0.5078, "step": 199 }, { "epoch": 0.02675943270002676, "grad_norm": 11.86974811553955, "learning_rate": 9.732405672999733e-07, "loss": 0.6289, "step": 200 }, { "epoch": 0.026893229863526893, "grad_norm": 12.342607498168945, "learning_rate": 9.73106770136473e-07, "loss": 0.3837, "step": 201 }, { "epoch": 0.02702702702702703, "grad_norm": 20.00912857055664, "learning_rate": 9.72972972972973e-07, "loss": 0.548, "step": 202 }, { "epoch": 0.02716082419052716, "grad_norm": 15.112037658691406, "learning_rate": 9.72839175809473e-07, "loss": 0.4774, "step": 203 }, { "epoch": 0.027294621354027295, "grad_norm": 24.426694869995117, "learning_rate": 9.727053786459728e-07, "loss": 0.6912, "step": 204 }, { "epoch": 0.027428418517527427, "grad_norm": 8.702140808105469, "learning_rate": 9.725715814824725e-07, "loss": 0.499, "step": 205 }, { "epoch": 0.027562215681027562, "grad_norm": 15.419366836547852, "learning_rate": 9.724377843189723e-07, "loss": 0.5786, "step": 206 }, { "epoch": 0.027696012844527697, "grad_norm": 36.53022003173828, "learning_rate": 9.723039871554722e-07, "loss": 0.5822, "step": 207 }, { "epoch": 0.02782981000802783, "grad_norm": 21.43185806274414, "learning_rate": 9.72170189991972e-07, "loss": 0.4623, "step": 208 }, { "epoch": 0.027963607171527963, "grad_norm": 29.665130615234375, "learning_rate": 9.72036392828472e-07, "loss": 0.4375, "step": 209 }, { "epoch": 0.0280974043350281, "grad_norm": 10.770829200744629, "learning_rate": 9.719025956649718e-07, "loss": 0.5295, "step": 210 }, { "epoch": 0.02823120149852823, "grad_norm": 8.270173072814941, "learning_rate": 9.717687985014717e-07, "loss": 0.5407, "step": 211 }, { "epoch": 0.028364998662028365, "grad_norm": 14.50229263305664, "learning_rate": 9.716350013379716e-07, "loss": 0.4846, "step": 212 }, { "epoch": 0.0284987958255285, "grad_norm": 7.3779449462890625, "learning_rate": 9.715012041744715e-07, "loss": 0.6004, "step": 213 }, { "epoch": 0.02863259298902863, "grad_norm": 24.017763137817383, "learning_rate": 9.713674070109714e-07, "loss": 0.3523, "step": 214 }, { "epoch": 0.028766390152528767, "grad_norm": 14.100053787231445, "learning_rate": 9.712336098474712e-07, "loss": 0.5918, "step": 215 }, { "epoch": 0.028900187316028902, "grad_norm": 35.09121322631836, "learning_rate": 9.71099812683971e-07, "loss": 0.4971, "step": 216 }, { "epoch": 0.029033984479529033, "grad_norm": 10.571396827697754, "learning_rate": 9.70966015520471e-07, "loss": 0.4975, "step": 217 }, { "epoch": 0.02916778164302917, "grad_norm": 7.85433292388916, "learning_rate": 9.708322183569709e-07, "loss": 0.4541, "step": 218 }, { "epoch": 0.0293015788065293, "grad_norm": 11.281142234802246, "learning_rate": 9.706984211934708e-07, "loss": 0.5418, "step": 219 }, { "epoch": 0.029435375970029435, "grad_norm": 19.933786392211914, "learning_rate": 9.705646240299704e-07, "loss": 0.3122, "step": 220 }, { "epoch": 0.02956917313352957, "grad_norm": 72.26953887939453, "learning_rate": 9.704308268664703e-07, "loss": 1.0308, "step": 221 }, { "epoch": 0.0297029702970297, "grad_norm": 8.94245433807373, "learning_rate": 9.702970297029702e-07, "loss": 0.3681, "step": 222 }, { "epoch": 0.029836767460529837, "grad_norm": 35.85159683227539, "learning_rate": 9.7016323253947e-07, "loss": 0.6327, "step": 223 }, { "epoch": 0.02997056462402997, "grad_norm": 28.939044952392578, "learning_rate": 9.7002943537597e-07, "loss": 0.5749, "step": 224 }, { "epoch": 0.030104361787530103, "grad_norm": 28.97385025024414, "learning_rate": 9.698956382124698e-07, "loss": 0.5232, "step": 225 }, { "epoch": 0.03023815895103024, "grad_norm": 29.839492797851562, "learning_rate": 9.697618410489697e-07, "loss": 0.5481, "step": 226 }, { "epoch": 0.030371956114530373, "grad_norm": 16.055540084838867, "learning_rate": 9.696280438854696e-07, "loss": 0.3972, "step": 227 }, { "epoch": 0.030505753278030505, "grad_norm": 9.32122802734375, "learning_rate": 9.694942467219695e-07, "loss": 0.329, "step": 228 }, { "epoch": 0.03063955044153064, "grad_norm": 10.133707046508789, "learning_rate": 9.693604495584693e-07, "loss": 0.4168, "step": 229 }, { "epoch": 0.030773347605030775, "grad_norm": 10.335691452026367, "learning_rate": 9.692266523949692e-07, "loss": 0.4222, "step": 230 }, { "epoch": 0.030907144768530907, "grad_norm": 45.927276611328125, "learning_rate": 9.69092855231469e-07, "loss": 0.5896, "step": 231 }, { "epoch": 0.03104094193203104, "grad_norm": 21.543195724487305, "learning_rate": 9.68959058067969e-07, "loss": 0.6347, "step": 232 }, { "epoch": 0.031174739095531173, "grad_norm": 36.20325469970703, "learning_rate": 9.688252609044689e-07, "loss": 0.336, "step": 233 }, { "epoch": 0.03130853625903131, "grad_norm": 39.510475158691406, "learning_rate": 9.686914637409687e-07, "loss": 0.5378, "step": 234 }, { "epoch": 0.03144233342253144, "grad_norm": 41.30943298339844, "learning_rate": 9.685576665774686e-07, "loss": 0.4866, "step": 235 }, { "epoch": 0.03157613058603158, "grad_norm": 12.817180633544922, "learning_rate": 9.684238694139683e-07, "loss": 0.4966, "step": 236 }, { "epoch": 0.03170992774953171, "grad_norm": 20.02503204345703, "learning_rate": 9.682900722504682e-07, "loss": 0.7379, "step": 237 }, { "epoch": 0.03184372491303184, "grad_norm": 19.8880558013916, "learning_rate": 9.68156275086968e-07, "loss": 0.5996, "step": 238 }, { "epoch": 0.031977522076531976, "grad_norm": 39.08367156982422, "learning_rate": 9.68022477923468e-07, "loss": 0.4225, "step": 239 }, { "epoch": 0.03211131924003211, "grad_norm": 32.905208587646484, "learning_rate": 9.678886807599678e-07, "loss": 0.5545, "step": 240 }, { "epoch": 0.03224511640353225, "grad_norm": 24.81601905822754, "learning_rate": 9.677548835964677e-07, "loss": 0.6167, "step": 241 }, { "epoch": 0.03237891356703238, "grad_norm": 19.0799560546875, "learning_rate": 9.676210864329676e-07, "loss": 0.4641, "step": 242 }, { "epoch": 0.03251271073053251, "grad_norm": 16.424758911132812, "learning_rate": 9.674872892694674e-07, "loss": 0.393, "step": 243 }, { "epoch": 0.032646507894032645, "grad_norm": 19.4969425201416, "learning_rate": 9.673534921059673e-07, "loss": 0.3819, "step": 244 }, { "epoch": 0.03278030505753278, "grad_norm": 26.53505516052246, "learning_rate": 9.672196949424672e-07, "loss": 0.6176, "step": 245 }, { "epoch": 0.032914102221032915, "grad_norm": 16.946748733520508, "learning_rate": 9.67085897778967e-07, "loss": 0.4065, "step": 246 }, { "epoch": 0.03304789938453305, "grad_norm": 10.157712936401367, "learning_rate": 9.66952100615467e-07, "loss": 0.4209, "step": 247 }, { "epoch": 0.033181696548033185, "grad_norm": 45.594139099121094, "learning_rate": 9.668183034519668e-07, "loss": 0.7558, "step": 248 }, { "epoch": 0.03331549371153331, "grad_norm": 13.750262260437012, "learning_rate": 9.666845062884667e-07, "loss": 0.5397, "step": 249 }, { "epoch": 0.03344929087503345, "grad_norm": 11.61208724975586, "learning_rate": 9.665507091249666e-07, "loss": 0.5743, "step": 250 }, { "epoch": 0.03358308803853358, "grad_norm": 38.98933029174805, "learning_rate": 9.664169119614663e-07, "loss": 0.4703, "step": 251 }, { "epoch": 0.03371688520203372, "grad_norm": 9.744982719421387, "learning_rate": 9.662831147979661e-07, "loss": 0.6113, "step": 252 }, { "epoch": 0.03385068236553385, "grad_norm": 17.790103912353516, "learning_rate": 9.66149317634466e-07, "loss": 0.5646, "step": 253 }, { "epoch": 0.03398447952903398, "grad_norm": 13.180794715881348, "learning_rate": 9.660155204709661e-07, "loss": 0.539, "step": 254 }, { "epoch": 0.034118276692534116, "grad_norm": 6.347810745239258, "learning_rate": 9.658817233074658e-07, "loss": 0.5939, "step": 255 }, { "epoch": 0.03425207385603425, "grad_norm": 9.106517791748047, "learning_rate": 9.657479261439657e-07, "loss": 0.4909, "step": 256 }, { "epoch": 0.034385871019534386, "grad_norm": 14.422121047973633, "learning_rate": 9.656141289804655e-07, "loss": 0.6318, "step": 257 }, { "epoch": 0.03451966818303452, "grad_norm": 28.771238327026367, "learning_rate": 9.654803318169654e-07, "loss": 0.417, "step": 258 }, { "epoch": 0.034653465346534656, "grad_norm": 18.286718368530273, "learning_rate": 9.653465346534653e-07, "loss": 0.4041, "step": 259 }, { "epoch": 0.034787262510034785, "grad_norm": 22.72861099243164, "learning_rate": 9.652127374899652e-07, "loss": 0.4499, "step": 260 }, { "epoch": 0.03492105967353492, "grad_norm": 10.565143585205078, "learning_rate": 9.65078940326465e-07, "loss": 0.4721, "step": 261 }, { "epoch": 0.035054856837035055, "grad_norm": 8.393845558166504, "learning_rate": 9.64945143162965e-07, "loss": 0.373, "step": 262 }, { "epoch": 0.03518865400053519, "grad_norm": 23.982633590698242, "learning_rate": 9.648113459994648e-07, "loss": 0.4717, "step": 263 }, { "epoch": 0.035322451164035325, "grad_norm": 33.5474967956543, "learning_rate": 9.646775488359647e-07, "loss": 0.6557, "step": 264 }, { "epoch": 0.03545624832753545, "grad_norm": 15.954710960388184, "learning_rate": 9.645437516724646e-07, "loss": 0.4898, "step": 265 }, { "epoch": 0.03559004549103559, "grad_norm": 15.541691780090332, "learning_rate": 9.644099545089642e-07, "loss": 0.4008, "step": 266 }, { "epoch": 0.03572384265453572, "grad_norm": 13.092925071716309, "learning_rate": 9.642761573454641e-07, "loss": 0.3705, "step": 267 }, { "epoch": 0.03585763981803586, "grad_norm": 15.496597290039062, "learning_rate": 9.641423601819642e-07, "loss": 0.673, "step": 268 }, { "epoch": 0.03599143698153599, "grad_norm": 13.220087051391602, "learning_rate": 9.64008563018464e-07, "loss": 0.3477, "step": 269 }, { "epoch": 0.03612523414503613, "grad_norm": 26.300270080566406, "learning_rate": 9.638747658549638e-07, "loss": 0.5481, "step": 270 }, { "epoch": 0.036259031308536256, "grad_norm": 16.569528579711914, "learning_rate": 9.637409686914636e-07, "loss": 0.6605, "step": 271 }, { "epoch": 0.03639282847203639, "grad_norm": 20.86603355407715, "learning_rate": 9.636071715279635e-07, "loss": 0.5627, "step": 272 }, { "epoch": 0.036526625635536526, "grad_norm": 27.723039627075195, "learning_rate": 9.634733743644634e-07, "loss": 0.4518, "step": 273 }, { "epoch": 0.03666042279903666, "grad_norm": 29.385784149169922, "learning_rate": 9.633395772009633e-07, "loss": 0.3977, "step": 274 }, { "epoch": 0.036794219962536796, "grad_norm": 23.307392120361328, "learning_rate": 9.632057800374632e-07, "loss": 0.5273, "step": 275 }, { "epoch": 0.03692801712603693, "grad_norm": 11.6332426071167, "learning_rate": 9.63071982873963e-07, "loss": 0.3704, "step": 276 }, { "epoch": 0.03706181428953706, "grad_norm": 11.539573669433594, "learning_rate": 9.62938185710463e-07, "loss": 0.2038, "step": 277 }, { "epoch": 0.037195611453037195, "grad_norm": 25.62273406982422, "learning_rate": 9.628043885469628e-07, "loss": 0.4663, "step": 278 }, { "epoch": 0.03732940861653733, "grad_norm": 59.18559265136719, "learning_rate": 9.626705913834627e-07, "loss": 0.6647, "step": 279 }, { "epoch": 0.037463205780037465, "grad_norm": 45.464332580566406, "learning_rate": 9.625367942199626e-07, "loss": 0.6322, "step": 280 }, { "epoch": 0.0375970029435376, "grad_norm": 58.86723709106445, "learning_rate": 9.624029970564624e-07, "loss": 0.741, "step": 281 }, { "epoch": 0.03773080010703773, "grad_norm": 23.75674819946289, "learning_rate": 9.62269199892962e-07, "loss": 0.7591, "step": 282 }, { "epoch": 0.03786459727053786, "grad_norm": 10.74997615814209, "learning_rate": 9.621354027294622e-07, "loss": 0.485, "step": 283 }, { "epoch": 0.037998394434038, "grad_norm": 35.385536193847656, "learning_rate": 9.62001605565962e-07, "loss": 0.577, "step": 284 }, { "epoch": 0.03813219159753813, "grad_norm": 24.03150177001953, "learning_rate": 9.61867808402462e-07, "loss": 0.5363, "step": 285 }, { "epoch": 0.03826598876103827, "grad_norm": 20.062177658081055, "learning_rate": 9.617340112389616e-07, "loss": 0.5289, "step": 286 }, { "epoch": 0.0383997859245384, "grad_norm": 33.248291015625, "learning_rate": 9.616002140754615e-07, "loss": 0.4148, "step": 287 }, { "epoch": 0.03853358308803853, "grad_norm": 11.273683547973633, "learning_rate": 9.614664169119614e-07, "loss": 0.4744, "step": 288 }, { "epoch": 0.038667380251538666, "grad_norm": 49.48904037475586, "learning_rate": 9.613326197484613e-07, "loss": 0.5273, "step": 289 }, { "epoch": 0.0388011774150388, "grad_norm": 27.68235969543457, "learning_rate": 9.611988225849611e-07, "loss": 0.4678, "step": 290 }, { "epoch": 0.038934974578538936, "grad_norm": 46.00740432739258, "learning_rate": 9.61065025421461e-07, "loss": 0.4031, "step": 291 }, { "epoch": 0.03906877174203907, "grad_norm": 12.648746490478516, "learning_rate": 9.609312282579609e-07, "loss": 0.6353, "step": 292 }, { "epoch": 0.0392025689055392, "grad_norm": 17.63758087158203, "learning_rate": 9.607974310944608e-07, "loss": 0.6004, "step": 293 }, { "epoch": 0.039336366069039334, "grad_norm": 14.645577430725098, "learning_rate": 9.606636339309606e-07, "loss": 0.3961, "step": 294 }, { "epoch": 0.03947016323253947, "grad_norm": 13.37092113494873, "learning_rate": 9.605298367674605e-07, "loss": 0.4835, "step": 295 }, { "epoch": 0.039603960396039604, "grad_norm": 16.067312240600586, "learning_rate": 9.603960396039604e-07, "loss": 0.4799, "step": 296 }, { "epoch": 0.03973775755953974, "grad_norm": 14.774861335754395, "learning_rate": 9.6026224244046e-07, "loss": 0.5235, "step": 297 }, { "epoch": 0.039871554723039875, "grad_norm": 9.550383567810059, "learning_rate": 9.601284452769602e-07, "loss": 0.3979, "step": 298 }, { "epoch": 0.04000535188654, "grad_norm": 14.881258010864258, "learning_rate": 9.5999464811346e-07, "loss": 0.519, "step": 299 }, { "epoch": 0.04013914905004014, "grad_norm": 22.718822479248047, "learning_rate": 9.5986085094996e-07, "loss": 0.4216, "step": 300 }, { "epoch": 0.04027294621354027, "grad_norm": 7.808596134185791, "learning_rate": 9.597270537864596e-07, "loss": 0.5098, "step": 301 }, { "epoch": 0.04040674337704041, "grad_norm": 21.73202133178711, "learning_rate": 9.595932566229595e-07, "loss": 0.6343, "step": 302 }, { "epoch": 0.04054054054054054, "grad_norm": 26.896236419677734, "learning_rate": 9.594594594594594e-07, "loss": 0.5551, "step": 303 }, { "epoch": 0.04067433770404067, "grad_norm": 6.170568943023682, "learning_rate": 9.593256622959592e-07, "loss": 0.3798, "step": 304 }, { "epoch": 0.040808134867540806, "grad_norm": 16.270544052124023, "learning_rate": 9.591918651324591e-07, "loss": 0.5655, "step": 305 }, { "epoch": 0.04094193203104094, "grad_norm": 18.479751586914062, "learning_rate": 9.59058067968959e-07, "loss": 0.4016, "step": 306 }, { "epoch": 0.041075729194541076, "grad_norm": 20.77754020690918, "learning_rate": 9.589242708054589e-07, "loss": 0.5366, "step": 307 }, { "epoch": 0.04120952635804121, "grad_norm": 12.61411190032959, "learning_rate": 9.587904736419587e-07, "loss": 0.4588, "step": 308 }, { "epoch": 0.041343323521541346, "grad_norm": 11.54073715209961, "learning_rate": 9.586566764784586e-07, "loss": 0.4257, "step": 309 }, { "epoch": 0.041477120685041474, "grad_norm": 39.93162155151367, "learning_rate": 9.585228793149585e-07, "loss": 0.3455, "step": 310 }, { "epoch": 0.04161091784854161, "grad_norm": 20.17289924621582, "learning_rate": 9.583890821514584e-07, "loss": 0.4072, "step": 311 }, { "epoch": 0.041744715012041744, "grad_norm": 10.415538787841797, "learning_rate": 9.58255284987958e-07, "loss": 0.3914, "step": 312 }, { "epoch": 0.04187851217554188, "grad_norm": 21.509065628051758, "learning_rate": 9.581214878244581e-07, "loss": 0.4117, "step": 313 }, { "epoch": 0.042012309339042014, "grad_norm": 9.776922225952148, "learning_rate": 9.57987690660958e-07, "loss": 0.3777, "step": 314 }, { "epoch": 0.04214610650254215, "grad_norm": 18.86647605895996, "learning_rate": 9.57853893497458e-07, "loss": 0.6892, "step": 315 }, { "epoch": 0.04227990366604228, "grad_norm": 31.983579635620117, "learning_rate": 9.577200963339576e-07, "loss": 0.37, "step": 316 }, { "epoch": 0.04241370082954241, "grad_norm": 28.836711883544922, "learning_rate": 9.575862991704574e-07, "loss": 0.6283, "step": 317 }, { "epoch": 0.04254749799304255, "grad_norm": 38.69367218017578, "learning_rate": 9.574525020069573e-07, "loss": 0.6031, "step": 318 }, { "epoch": 0.04268129515654268, "grad_norm": 18.33104133605957, "learning_rate": 9.573187048434572e-07, "loss": 0.4934, "step": 319 }, { "epoch": 0.04281509232004282, "grad_norm": 18.744720458984375, "learning_rate": 9.57184907679957e-07, "loss": 0.5949, "step": 320 }, { "epoch": 0.042948889483542946, "grad_norm": 10.265746116638184, "learning_rate": 9.57051110516457e-07, "loss": 0.4912, "step": 321 }, { "epoch": 0.04308268664704308, "grad_norm": 27.541828155517578, "learning_rate": 9.569173133529568e-07, "loss": 0.363, "step": 322 }, { "epoch": 0.043216483810543216, "grad_norm": 14.46649169921875, "learning_rate": 9.567835161894567e-07, "loss": 0.4497, "step": 323 }, { "epoch": 0.04335028097404335, "grad_norm": 30.773880004882812, "learning_rate": 9.566497190259566e-07, "loss": 0.5391, "step": 324 }, { "epoch": 0.043484078137543486, "grad_norm": 46.86250686645508, "learning_rate": 9.565159218624565e-07, "loss": 0.3471, "step": 325 }, { "epoch": 0.04361787530104362, "grad_norm": 19.321178436279297, "learning_rate": 9.563821246989564e-07, "loss": 0.4495, "step": 326 }, { "epoch": 0.04375167246454375, "grad_norm": 10.53525447845459, "learning_rate": 9.562483275354562e-07, "loss": 0.3883, "step": 327 }, { "epoch": 0.043885469628043884, "grad_norm": 18.363567352294922, "learning_rate": 9.561145303719561e-07, "loss": 0.6246, "step": 328 }, { "epoch": 0.04401926679154402, "grad_norm": 16.05950164794922, "learning_rate": 9.55980733208456e-07, "loss": 0.353, "step": 329 }, { "epoch": 0.044153063955044154, "grad_norm": 8.734678268432617, "learning_rate": 9.558469360449559e-07, "loss": 0.3811, "step": 330 }, { "epoch": 0.04428686111854429, "grad_norm": 9.962549209594727, "learning_rate": 9.557131388814558e-07, "loss": 0.5225, "step": 331 }, { "epoch": 0.04442065828204442, "grad_norm": 35.65846633911133, "learning_rate": 9.555793417179554e-07, "loss": 0.6538, "step": 332 }, { "epoch": 0.04455445544554455, "grad_norm": 36.1397590637207, "learning_rate": 9.554455445544553e-07, "loss": 0.3637, "step": 333 }, { "epoch": 0.04468825260904469, "grad_norm": 16.884204864501953, "learning_rate": 9.553117473909552e-07, "loss": 0.7461, "step": 334 }, { "epoch": 0.04482204977254482, "grad_norm": 12.1273193359375, "learning_rate": 9.551779502274553e-07, "loss": 0.5437, "step": 335 }, { "epoch": 0.04495584693604496, "grad_norm": 31.238067626953125, "learning_rate": 9.55044153063955e-07, "loss": 0.6259, "step": 336 }, { "epoch": 0.04508964409954509, "grad_norm": 7.62296199798584, "learning_rate": 9.549103559004548e-07, "loss": 0.4153, "step": 337 }, { "epoch": 0.04522344126304522, "grad_norm": 10.458264350891113, "learning_rate": 9.547765587369547e-07, "loss": 0.3106, "step": 338 }, { "epoch": 0.045357238426545356, "grad_norm": 6.694291591644287, "learning_rate": 9.546427615734546e-07, "loss": 0.3941, "step": 339 }, { "epoch": 0.04549103559004549, "grad_norm": 14.521608352661133, "learning_rate": 9.545089644099545e-07, "loss": 0.3232, "step": 340 }, { "epoch": 0.045624832753545626, "grad_norm": 23.134746551513672, "learning_rate": 9.543751672464543e-07, "loss": 0.559, "step": 341 }, { "epoch": 0.04575862991704576, "grad_norm": 23.279645919799805, "learning_rate": 9.542413700829542e-07, "loss": 0.6368, "step": 342 }, { "epoch": 0.045892427080545896, "grad_norm": 28.379045486450195, "learning_rate": 9.54107572919454e-07, "loss": 0.6625, "step": 343 }, { "epoch": 0.046026224244046024, "grad_norm": 28.548179626464844, "learning_rate": 9.53973775755954e-07, "loss": 0.3063, "step": 344 }, { "epoch": 0.04616002140754616, "grad_norm": 10.461892127990723, "learning_rate": 9.538399785924539e-07, "loss": 0.5598, "step": 345 }, { "epoch": 0.046293818571046294, "grad_norm": 11.202370643615723, "learning_rate": 9.537061814289537e-07, "loss": 0.3984, "step": 346 }, { "epoch": 0.04642761573454643, "grad_norm": 11.296040534973145, "learning_rate": 9.535723842654535e-07, "loss": 0.5074, "step": 347 }, { "epoch": 0.046561412898046564, "grad_norm": 10.537249565124512, "learning_rate": 9.534385871019534e-07, "loss": 0.4794, "step": 348 }, { "epoch": 0.04669521006154669, "grad_norm": 26.24642562866211, "learning_rate": 9.533047899384533e-07, "loss": 0.4148, "step": 349 }, { "epoch": 0.04682900722504683, "grad_norm": 12.477163314819336, "learning_rate": 9.531709927749531e-07, "loss": 0.5029, "step": 350 }, { "epoch": 0.04696280438854696, "grad_norm": 20.183935165405273, "learning_rate": 9.53037195611453e-07, "loss": 0.413, "step": 351 }, { "epoch": 0.0470966015520471, "grad_norm": 9.955525398254395, "learning_rate": 9.529033984479529e-07, "loss": 0.3395, "step": 352 }, { "epoch": 0.04723039871554723, "grad_norm": 15.379146575927734, "learning_rate": 9.527696012844527e-07, "loss": 0.4466, "step": 353 }, { "epoch": 0.04736419587904737, "grad_norm": 29.041584014892578, "learning_rate": 9.526358041209526e-07, "loss": 0.5577, "step": 354 }, { "epoch": 0.047497993042547496, "grad_norm": 27.489521026611328, "learning_rate": 9.525020069574524e-07, "loss": 0.65, "step": 355 }, { "epoch": 0.04763179020604763, "grad_norm": 69.29265594482422, "learning_rate": 9.523682097939523e-07, "loss": 1.0212, "step": 356 }, { "epoch": 0.047765587369547766, "grad_norm": 29.046466827392578, "learning_rate": 9.522344126304522e-07, "loss": 0.4463, "step": 357 }, { "epoch": 0.0478993845330479, "grad_norm": 28.384323120117188, "learning_rate": 9.521006154669521e-07, "loss": 0.4954, "step": 358 }, { "epoch": 0.048033181696548036, "grad_norm": 12.317133903503418, "learning_rate": 9.51966818303452e-07, "loss": 0.3114, "step": 359 }, { "epoch": 0.048166978860048164, "grad_norm": 12.511984825134277, "learning_rate": 9.518330211399518e-07, "loss": 0.2026, "step": 360 }, { "epoch": 0.0483007760235483, "grad_norm": 11.313870429992676, "learning_rate": 9.516992239764516e-07, "loss": 0.3127, "step": 361 }, { "epoch": 0.048434573187048434, "grad_norm": 19.103620529174805, "learning_rate": 9.515654268129515e-07, "loss": 0.5126, "step": 362 }, { "epoch": 0.04856837035054857, "grad_norm": 20.43375015258789, "learning_rate": 9.514316296494514e-07, "loss": 0.4487, "step": 363 }, { "epoch": 0.048702167514048704, "grad_norm": 24.0658016204834, "learning_rate": 9.512978324859514e-07, "loss": 0.4885, "step": 364 }, { "epoch": 0.04883596467754884, "grad_norm": 14.990455627441406, "learning_rate": 9.511640353224511e-07, "loss": 0.4332, "step": 365 }, { "epoch": 0.04896976184104897, "grad_norm": 11.73886775970459, "learning_rate": 9.51030238158951e-07, "loss": 0.4612, "step": 366 }, { "epoch": 0.0491035590045491, "grad_norm": 22.96059799194336, "learning_rate": 9.508964409954509e-07, "loss": 0.4256, "step": 367 }, { "epoch": 0.04923735616804924, "grad_norm": 26.383779525756836, "learning_rate": 9.507626438319507e-07, "loss": 0.4087, "step": 368 }, { "epoch": 0.04937115333154937, "grad_norm": 23.529930114746094, "learning_rate": 9.506288466684505e-07, "loss": 0.7068, "step": 369 }, { "epoch": 0.04950495049504951, "grad_norm": 23.119876861572266, "learning_rate": 9.504950495049504e-07, "loss": 0.5122, "step": 370 }, { "epoch": 0.049638747658549635, "grad_norm": 19.13673210144043, "learning_rate": 9.503612523414504e-07, "loss": 0.6036, "step": 371 }, { "epoch": 0.04977254482204977, "grad_norm": 23.777297973632812, "learning_rate": 9.502274551779502e-07, "loss": 0.454, "step": 372 }, { "epoch": 0.049906341985549905, "grad_norm": 36.48681640625, "learning_rate": 9.5009365801445e-07, "loss": 0.4804, "step": 373 }, { "epoch": 0.05004013914905004, "grad_norm": 23.13248062133789, "learning_rate": 9.499598608509499e-07, "loss": 0.4112, "step": 374 }, { "epoch": 0.050173936312550176, "grad_norm": 11.668293952941895, "learning_rate": 9.498260636874498e-07, "loss": 0.3628, "step": 375 }, { "epoch": 0.05030773347605031, "grad_norm": 11.535722732543945, "learning_rate": 9.496922665239496e-07, "loss": 0.2802, "step": 376 }, { "epoch": 0.05044153063955044, "grad_norm": 14.43337345123291, "learning_rate": 9.495584693604495e-07, "loss": 0.301, "step": 377 }, { "epoch": 0.050575327803050574, "grad_norm": 14.861862182617188, "learning_rate": 9.494246721969493e-07, "loss": 0.3933, "step": 378 }, { "epoch": 0.05070912496655071, "grad_norm": 14.851324081420898, "learning_rate": 9.492908750334493e-07, "loss": 0.3379, "step": 379 }, { "epoch": 0.050842922130050844, "grad_norm": 40.331459045410156, "learning_rate": 9.491570778699491e-07, "loss": 0.6712, "step": 380 }, { "epoch": 0.05097671929355098, "grad_norm": 38.974708557128906, "learning_rate": 9.49023280706449e-07, "loss": 0.6351, "step": 381 }, { "epoch": 0.051110516457051114, "grad_norm": 46.86833190917969, "learning_rate": 9.488894835429489e-07, "loss": 0.6068, "step": 382 }, { "epoch": 0.05124431362055124, "grad_norm": 20.365400314331055, "learning_rate": 9.487556863794487e-07, "loss": 0.5312, "step": 383 }, { "epoch": 0.05137811078405138, "grad_norm": 25.038856506347656, "learning_rate": 9.486218892159485e-07, "loss": 0.3357, "step": 384 }, { "epoch": 0.05151190794755151, "grad_norm": 27.225908279418945, "learning_rate": 9.484880920524484e-07, "loss": 0.3946, "step": 385 }, { "epoch": 0.05164570511105165, "grad_norm": 11.397448539733887, "learning_rate": 9.483542948889484e-07, "loss": 0.3853, "step": 386 }, { "epoch": 0.05177950227455178, "grad_norm": 17.89453887939453, "learning_rate": 9.482204977254483e-07, "loss": 0.5318, "step": 387 }, { "epoch": 0.05191329943805191, "grad_norm": 29.323179244995117, "learning_rate": 9.48086700561948e-07, "loss": 0.3209, "step": 388 }, { "epoch": 0.052047096601552045, "grad_norm": 31.232158660888672, "learning_rate": 9.479529033984479e-07, "loss": 0.3534, "step": 389 }, { "epoch": 0.05218089376505218, "grad_norm": 9.886492729187012, "learning_rate": 9.478191062349478e-07, "loss": 0.4901, "step": 390 }, { "epoch": 0.052314690928552315, "grad_norm": 11.811864852905273, "learning_rate": 9.476853090714476e-07, "loss": 0.5325, "step": 391 }, { "epoch": 0.05244848809205245, "grad_norm": 47.475189208984375, "learning_rate": 9.475515119079474e-07, "loss": 0.3363, "step": 392 }, { "epoch": 0.052582285255552585, "grad_norm": 11.338348388671875, "learning_rate": 9.474177147444474e-07, "loss": 0.4073, "step": 393 }, { "epoch": 0.052716082419052714, "grad_norm": 20.128520965576172, "learning_rate": 9.472839175809473e-07, "loss": 0.6519, "step": 394 }, { "epoch": 0.05284987958255285, "grad_norm": 28.00393295288086, "learning_rate": 9.471501204174471e-07, "loss": 0.6315, "step": 395 }, { "epoch": 0.052983676746052984, "grad_norm": 11.389805793762207, "learning_rate": 9.47016323253947e-07, "loss": 0.4137, "step": 396 }, { "epoch": 0.05311747390955312, "grad_norm": 13.974609375, "learning_rate": 9.468825260904468e-07, "loss": 0.377, "step": 397 }, { "epoch": 0.053251271073053254, "grad_norm": 9.007412910461426, "learning_rate": 9.467487289269467e-07, "loss": 0.4268, "step": 398 }, { "epoch": 0.05338506823655338, "grad_norm": 9.406835556030273, "learning_rate": 9.466149317634465e-07, "loss": 0.3854, "step": 399 }, { "epoch": 0.05351886540005352, "grad_norm": 11.833761215209961, "learning_rate": 9.464811345999464e-07, "loss": 0.3906, "step": 400 }, { "epoch": 0.05365266256355365, "grad_norm": 20.43267059326172, "learning_rate": 9.463473374364464e-07, "loss": 0.6387, "step": 401 }, { "epoch": 0.05378645972705379, "grad_norm": 27.975900650024414, "learning_rate": 9.462135402729462e-07, "loss": 0.5483, "step": 402 }, { "epoch": 0.05392025689055392, "grad_norm": 13.52100658416748, "learning_rate": 9.46079743109446e-07, "loss": 0.3451, "step": 403 }, { "epoch": 0.05405405405405406, "grad_norm": 12.755200386047363, "learning_rate": 9.459459459459459e-07, "loss": 0.4084, "step": 404 }, { "epoch": 0.054187851217554185, "grad_norm": 32.377017974853516, "learning_rate": 9.458121487824458e-07, "loss": 0.764, "step": 405 }, { "epoch": 0.05432164838105432, "grad_norm": 19.081512451171875, "learning_rate": 9.456783516189456e-07, "loss": 0.4899, "step": 406 }, { "epoch": 0.054455445544554455, "grad_norm": 33.504058837890625, "learning_rate": 9.455445544554454e-07, "loss": 0.4767, "step": 407 }, { "epoch": 0.05458924270805459, "grad_norm": 16.261573791503906, "learning_rate": 9.454107572919454e-07, "loss": 0.5096, "step": 408 }, { "epoch": 0.054723039871554725, "grad_norm": 52.486690521240234, "learning_rate": 9.452769601284453e-07, "loss": 0.2791, "step": 409 }, { "epoch": 0.05485683703505485, "grad_norm": 34.45723342895508, "learning_rate": 9.451431629649452e-07, "loss": 0.8269, "step": 410 }, { "epoch": 0.05499063419855499, "grad_norm": 30.467134475708008, "learning_rate": 9.450093658014449e-07, "loss": 0.5919, "step": 411 }, { "epoch": 0.055124431362055124, "grad_norm": 29.93265724182129, "learning_rate": 9.448755686379448e-07, "loss": 0.5651, "step": 412 }, { "epoch": 0.05525822852555526, "grad_norm": 22.17186164855957, "learning_rate": 9.447417714744447e-07, "loss": 0.4493, "step": 413 }, { "epoch": 0.055392025689055394, "grad_norm": 18.992136001586914, "learning_rate": 9.446079743109446e-07, "loss": 0.5053, "step": 414 }, { "epoch": 0.05552582285255553, "grad_norm": 37.11378479003906, "learning_rate": 9.444741771474445e-07, "loss": 0.4179, "step": 415 }, { "epoch": 0.05565962001605566, "grad_norm": 8.95394515991211, "learning_rate": 9.443403799839443e-07, "loss": 0.4103, "step": 416 }, { "epoch": 0.05579341717955579, "grad_norm": 12.03274154663086, "learning_rate": 9.442065828204442e-07, "loss": 0.4112, "step": 417 }, { "epoch": 0.05592721434305593, "grad_norm": 39.73799133300781, "learning_rate": 9.440727856569441e-07, "loss": 0.622, "step": 418 }, { "epoch": 0.05606101150655606, "grad_norm": 29.603208541870117, "learning_rate": 9.439389884934439e-07, "loss": 0.5491, "step": 419 }, { "epoch": 0.0561948086700562, "grad_norm": 35.343116760253906, "learning_rate": 9.438051913299437e-07, "loss": 0.5777, "step": 420 }, { "epoch": 0.05632860583355633, "grad_norm": 30.707962036132812, "learning_rate": 9.436713941664436e-07, "loss": 0.6635, "step": 421 }, { "epoch": 0.05646240299705646, "grad_norm": 27.233768463134766, "learning_rate": 9.435375970029434e-07, "loss": 0.6565, "step": 422 }, { "epoch": 0.056596200160556595, "grad_norm": 18.183406829833984, "learning_rate": 9.434037998394434e-07, "loss": 0.5344, "step": 423 }, { "epoch": 0.05672999732405673, "grad_norm": 25.41071319580078, "learning_rate": 9.432700026759433e-07, "loss": 0.2269, "step": 424 }, { "epoch": 0.056863794487556865, "grad_norm": 8.937655448913574, "learning_rate": 9.431362055124431e-07, "loss": 0.5424, "step": 425 }, { "epoch": 0.056997591651057, "grad_norm": 18.178482055664062, "learning_rate": 9.430024083489429e-07, "loss": 0.5464, "step": 426 }, { "epoch": 0.05713138881455713, "grad_norm": 15.047130584716797, "learning_rate": 9.428686111854428e-07, "loss": 0.4629, "step": 427 }, { "epoch": 0.05726518597805726, "grad_norm": 10.116939544677734, "learning_rate": 9.427348140219427e-07, "loss": 0.5825, "step": 428 }, { "epoch": 0.0573989831415574, "grad_norm": 24.839475631713867, "learning_rate": 9.426010168584426e-07, "loss": 0.5104, "step": 429 }, { "epoch": 0.05753278030505753, "grad_norm": 10.998244285583496, "learning_rate": 9.424672196949424e-07, "loss": 0.3131, "step": 430 }, { "epoch": 0.05766657746855767, "grad_norm": 39.46440505981445, "learning_rate": 9.423334225314423e-07, "loss": 0.6558, "step": 431 }, { "epoch": 0.057800374632057804, "grad_norm": 12.731789588928223, "learning_rate": 9.421996253679422e-07, "loss": 0.369, "step": 432 }, { "epoch": 0.05793417179555793, "grad_norm": 21.854480743408203, "learning_rate": 9.420658282044421e-07, "loss": 0.3717, "step": 433 }, { "epoch": 0.05806796895905807, "grad_norm": 9.045459747314453, "learning_rate": 9.419320310409418e-07, "loss": 0.5693, "step": 434 }, { "epoch": 0.0582017661225582, "grad_norm": 19.638029098510742, "learning_rate": 9.417982338774417e-07, "loss": 0.4663, "step": 435 }, { "epoch": 0.05833556328605834, "grad_norm": 15.247964859008789, "learning_rate": 9.416644367139416e-07, "loss": 0.5122, "step": 436 }, { "epoch": 0.05846936044955847, "grad_norm": 14.863848686218262, "learning_rate": 9.415306395504416e-07, "loss": 0.4558, "step": 437 }, { "epoch": 0.0586031576130586, "grad_norm": 45.182552337646484, "learning_rate": 9.413968423869414e-07, "loss": 0.8523, "step": 438 }, { "epoch": 0.058736954776558735, "grad_norm": 7.975986957550049, "learning_rate": 9.412630452234412e-07, "loss": 0.3929, "step": 439 }, { "epoch": 0.05887075194005887, "grad_norm": 35.42660140991211, "learning_rate": 9.411292480599411e-07, "loss": 0.3958, "step": 440 }, { "epoch": 0.059004549103559005, "grad_norm": 40.076210021972656, "learning_rate": 9.40995450896441e-07, "loss": 0.4833, "step": 441 }, { "epoch": 0.05913834626705914, "grad_norm": 11.552017211914062, "learning_rate": 9.408616537329408e-07, "loss": 0.272, "step": 442 }, { "epoch": 0.059272143430559275, "grad_norm": 23.314577102661133, "learning_rate": 9.407278565694406e-07, "loss": 0.4568, "step": 443 }, { "epoch": 0.0594059405940594, "grad_norm": 19.732851028442383, "learning_rate": 9.405940594059405e-07, "loss": 0.486, "step": 444 }, { "epoch": 0.05953973775755954, "grad_norm": 15.801751136779785, "learning_rate": 9.404602622424405e-07, "loss": 0.3602, "step": 445 }, { "epoch": 0.05967353492105967, "grad_norm": 15.728655815124512, "learning_rate": 9.403264650789403e-07, "loss": 0.3716, "step": 446 }, { "epoch": 0.05980733208455981, "grad_norm": 16.705907821655273, "learning_rate": 9.401926679154402e-07, "loss": 0.528, "step": 447 }, { "epoch": 0.05994112924805994, "grad_norm": 11.061691284179688, "learning_rate": 9.4005887075194e-07, "loss": 0.4306, "step": 448 }, { "epoch": 0.06007492641156007, "grad_norm": 23.27248191833496, "learning_rate": 9.399250735884398e-07, "loss": 0.6406, "step": 449 }, { "epoch": 0.060208723575060207, "grad_norm": 50.76069259643555, "learning_rate": 9.397912764249397e-07, "loss": 0.7035, "step": 450 }, { "epoch": 0.06034252073856034, "grad_norm": 10.46657657623291, "learning_rate": 9.396574792614396e-07, "loss": 0.329, "step": 451 }, { "epoch": 0.06047631790206048, "grad_norm": 38.9277458190918, "learning_rate": 9.395236820979396e-07, "loss": 0.5615, "step": 452 }, { "epoch": 0.06061011506556061, "grad_norm": 21.497838973999023, "learning_rate": 9.393898849344393e-07, "loss": 0.4445, "step": 453 }, { "epoch": 0.06074391222906075, "grad_norm": 22.607465744018555, "learning_rate": 9.392560877709392e-07, "loss": 0.3294, "step": 454 }, { "epoch": 0.060877709392560875, "grad_norm": 16.306434631347656, "learning_rate": 9.391222906074391e-07, "loss": 0.262, "step": 455 }, { "epoch": 0.06101150655606101, "grad_norm": 28.63240623474121, "learning_rate": 9.38988493443939e-07, "loss": 0.5248, "step": 456 }, { "epoch": 0.061145303719561145, "grad_norm": 32.38787078857422, "learning_rate": 9.388546962804387e-07, "loss": 0.294, "step": 457 }, { "epoch": 0.06127910088306128, "grad_norm": 25.005624771118164, "learning_rate": 9.387208991169386e-07, "loss": 0.342, "step": 458 }, { "epoch": 0.061412898046561415, "grad_norm": 22.250158309936523, "learning_rate": 9.385871019534386e-07, "loss": 0.565, "step": 459 }, { "epoch": 0.06154669521006155, "grad_norm": 10.872614860534668, "learning_rate": 9.384533047899385e-07, "loss": 0.3975, "step": 460 }, { "epoch": 0.06168049237356168, "grad_norm": 19.91939926147461, "learning_rate": 9.383195076264383e-07, "loss": 0.285, "step": 461 }, { "epoch": 0.06181428953706181, "grad_norm": 12.522256851196289, "learning_rate": 9.381857104629381e-07, "loss": 0.2267, "step": 462 }, { "epoch": 0.06194808670056195, "grad_norm": 25.6498966217041, "learning_rate": 9.38051913299438e-07, "loss": 0.3691, "step": 463 }, { "epoch": 0.06208188386406208, "grad_norm": 23.04656219482422, "learning_rate": 9.379181161359379e-07, "loss": 0.3045, "step": 464 }, { "epoch": 0.06221568102756222, "grad_norm": 18.728397369384766, "learning_rate": 9.377843189724377e-07, "loss": 0.2476, "step": 465 }, { "epoch": 0.062349478191062346, "grad_norm": 14.424503326416016, "learning_rate": 9.376505218089376e-07, "loss": 0.4591, "step": 466 }, { "epoch": 0.06248327535456248, "grad_norm": 65.9581069946289, "learning_rate": 9.375167246454375e-07, "loss": 0.8436, "step": 467 }, { "epoch": 0.06261707251806262, "grad_norm": 9.581587791442871, "learning_rate": 9.373829274819374e-07, "loss": 0.3638, "step": 468 }, { "epoch": 0.06275086968156275, "grad_norm": 37.46302032470703, "learning_rate": 9.372491303184372e-07, "loss": 0.7028, "step": 469 }, { "epoch": 0.06288466684506289, "grad_norm": 36.30107879638672, "learning_rate": 9.371153331549371e-07, "loss": 0.842, "step": 470 }, { "epoch": 0.06301846400856302, "grad_norm": 21.38796043395996, "learning_rate": 9.36981535991437e-07, "loss": 0.2544, "step": 471 }, { "epoch": 0.06315226117206316, "grad_norm": 18.06505012512207, "learning_rate": 9.368477388279367e-07, "loss": 0.4107, "step": 472 }, { "epoch": 0.06328605833556329, "grad_norm": 22.72092056274414, "learning_rate": 9.367139416644366e-07, "loss": 0.4072, "step": 473 }, { "epoch": 0.06341985549906343, "grad_norm": 14.594680786132812, "learning_rate": 9.365801445009366e-07, "loss": 0.5001, "step": 474 }, { "epoch": 0.06355365266256355, "grad_norm": 15.963287353515625, "learning_rate": 9.364463473374365e-07, "loss": 0.5567, "step": 475 }, { "epoch": 0.06368744982606368, "grad_norm": 18.204336166381836, "learning_rate": 9.363125501739362e-07, "loss": 0.5929, "step": 476 }, { "epoch": 0.06382124698956382, "grad_norm": 37.33431625366211, "learning_rate": 9.361787530104361e-07, "loss": 0.3392, "step": 477 }, { "epoch": 0.06395504415306395, "grad_norm": 21.086002349853516, "learning_rate": 9.36044955846936e-07, "loss": 0.4538, "step": 478 }, { "epoch": 0.06408884131656409, "grad_norm": 19.572265625, "learning_rate": 9.359111586834359e-07, "loss": 0.4583, "step": 479 }, { "epoch": 0.06422263848006422, "grad_norm": 9.397624969482422, "learning_rate": 9.357773615199357e-07, "loss": 0.504, "step": 480 }, { "epoch": 0.06435643564356436, "grad_norm": 10.441423416137695, "learning_rate": 9.356435643564356e-07, "loss": 0.4289, "step": 481 }, { "epoch": 0.0644902328070645, "grad_norm": 12.805157661437988, "learning_rate": 9.355097671929355e-07, "loss": 0.4918, "step": 482 }, { "epoch": 0.06462402997056463, "grad_norm": 13.465669631958008, "learning_rate": 9.353759700294354e-07, "loss": 0.3764, "step": 483 }, { "epoch": 0.06475782713406476, "grad_norm": 44.650665283203125, "learning_rate": 9.352421728659352e-07, "loss": 0.6466, "step": 484 }, { "epoch": 0.0648916242975649, "grad_norm": 34.85614776611328, "learning_rate": 9.35108375702435e-07, "loss": 0.4776, "step": 485 }, { "epoch": 0.06502542146106502, "grad_norm": 50.9043083190918, "learning_rate": 9.349745785389349e-07, "loss": 0.6696, "step": 486 }, { "epoch": 0.06515921862456515, "grad_norm": 33.14093017578125, "learning_rate": 9.348407813754348e-07, "loss": 0.2203, "step": 487 }, { "epoch": 0.06529301578806529, "grad_norm": 35.72391891479492, "learning_rate": 9.347069842119346e-07, "loss": 0.7232, "step": 488 }, { "epoch": 0.06542681295156542, "grad_norm": 24.071571350097656, "learning_rate": 9.345731870484346e-07, "loss": 0.6756, "step": 489 }, { "epoch": 0.06556061011506556, "grad_norm": 18.50311279296875, "learning_rate": 9.344393898849344e-07, "loss": 0.5292, "step": 490 }, { "epoch": 0.0656944072785657, "grad_norm": 25.16903305053711, "learning_rate": 9.343055927214343e-07, "loss": 0.417, "step": 491 }, { "epoch": 0.06582820444206583, "grad_norm": 21.560083389282227, "learning_rate": 9.341717955579341e-07, "loss": 0.4849, "step": 492 }, { "epoch": 0.06596200160556596, "grad_norm": 18.530662536621094, "learning_rate": 9.34037998394434e-07, "loss": 0.565, "step": 493 }, { "epoch": 0.0660957987690661, "grad_norm": 13.564650535583496, "learning_rate": 9.339042012309339e-07, "loss": 0.4875, "step": 494 }, { "epoch": 0.06622959593256623, "grad_norm": 27.3027286529541, "learning_rate": 9.337704040674336e-07, "loss": 0.511, "step": 495 }, { "epoch": 0.06636339309606637, "grad_norm": 58.96568298339844, "learning_rate": 9.336366069039336e-07, "loss": 0.4851, "step": 496 }, { "epoch": 0.06649719025956649, "grad_norm": 31.803241729736328, "learning_rate": 9.335028097404335e-07, "loss": 0.5441, "step": 497 }, { "epoch": 0.06663098742306663, "grad_norm": 10.672648429870605, "learning_rate": 9.333690125769334e-07, "loss": 0.5978, "step": 498 }, { "epoch": 0.06676478458656676, "grad_norm": 10.119758605957031, "learning_rate": 9.332352154134331e-07, "loss": 0.4498, "step": 499 }, { "epoch": 0.0668985817500669, "grad_norm": 12.749112129211426, "learning_rate": 9.33101418249933e-07, "loss": 0.4446, "step": 500 }, { "epoch": 0.06703237891356703, "grad_norm": 11.803135871887207, "learning_rate": 9.329676210864329e-07, "loss": 0.6344, "step": 501 }, { "epoch": 0.06716617607706717, "grad_norm": 29.999422073364258, "learning_rate": 9.328338239229328e-07, "loss": 0.4708, "step": 502 }, { "epoch": 0.0672999732405673, "grad_norm": 13.939997673034668, "learning_rate": 9.327000267594327e-07, "loss": 0.452, "step": 503 }, { "epoch": 0.06743377040406744, "grad_norm": 11.15085506439209, "learning_rate": 9.325662295959325e-07, "loss": 0.4715, "step": 504 }, { "epoch": 0.06756756756756757, "grad_norm": 15.762297630310059, "learning_rate": 9.324324324324324e-07, "loss": 0.5799, "step": 505 }, { "epoch": 0.0677013647310677, "grad_norm": 7.999119758605957, "learning_rate": 9.322986352689323e-07, "loss": 0.3886, "step": 506 }, { "epoch": 0.06783516189456784, "grad_norm": 10.307694435119629, "learning_rate": 9.321648381054321e-07, "loss": 0.4442, "step": 507 }, { "epoch": 0.06796895905806796, "grad_norm": 9.153679847717285, "learning_rate": 9.32031040941932e-07, "loss": 0.2785, "step": 508 }, { "epoch": 0.0681027562215681, "grad_norm": 14.056694984436035, "learning_rate": 9.318972437784318e-07, "loss": 0.3619, "step": 509 }, { "epoch": 0.06823655338506823, "grad_norm": 7.571393013000488, "learning_rate": 9.317634466149317e-07, "loss": 0.3028, "step": 510 }, { "epoch": 0.06837035054856837, "grad_norm": 31.3958683013916, "learning_rate": 9.316296494514316e-07, "loss": 0.5813, "step": 511 }, { "epoch": 0.0685041477120685, "grad_norm": 25.85735511779785, "learning_rate": 9.314958522879315e-07, "loss": 0.7481, "step": 512 }, { "epoch": 0.06863794487556864, "grad_norm": 18.95513343811035, "learning_rate": 9.313620551244313e-07, "loss": 0.4306, "step": 513 }, { "epoch": 0.06877174203906877, "grad_norm": 25.907108306884766, "learning_rate": 9.312282579609312e-07, "loss": 0.4038, "step": 514 }, { "epoch": 0.06890553920256891, "grad_norm": 18.160091400146484, "learning_rate": 9.31094460797431e-07, "loss": 0.4933, "step": 515 }, { "epoch": 0.06903933636606904, "grad_norm": 23.835655212402344, "learning_rate": 9.309606636339309e-07, "loss": 0.4183, "step": 516 }, { "epoch": 0.06917313352956918, "grad_norm": 17.362396240234375, "learning_rate": 9.308268664704308e-07, "loss": 0.4979, "step": 517 }, { "epoch": 0.06930693069306931, "grad_norm": 17.188486099243164, "learning_rate": 9.306930693069307e-07, "loss": 0.3863, "step": 518 }, { "epoch": 0.06944072785656943, "grad_norm": 21.88876724243164, "learning_rate": 9.305592721434305e-07, "loss": 0.5404, "step": 519 }, { "epoch": 0.06957452502006957, "grad_norm": 11.673177719116211, "learning_rate": 9.304254749799304e-07, "loss": 0.4644, "step": 520 }, { "epoch": 0.0697083221835697, "grad_norm": 20.705411911010742, "learning_rate": 9.302916778164303e-07, "loss": 0.4308, "step": 521 }, { "epoch": 0.06984211934706984, "grad_norm": 11.938777923583984, "learning_rate": 9.301578806529302e-07, "loss": 0.4317, "step": 522 }, { "epoch": 0.06997591651056997, "grad_norm": 31.949655532836914, "learning_rate": 9.300240834894299e-07, "loss": 0.3685, "step": 523 }, { "epoch": 0.07010971367407011, "grad_norm": 19.437265396118164, "learning_rate": 9.298902863259298e-07, "loss": 0.5452, "step": 524 }, { "epoch": 0.07024351083757024, "grad_norm": 14.550933837890625, "learning_rate": 9.297564891624298e-07, "loss": 0.5657, "step": 525 }, { "epoch": 0.07037730800107038, "grad_norm": 36.25200271606445, "learning_rate": 9.296226919989296e-07, "loss": 0.6068, "step": 526 }, { "epoch": 0.07051110516457051, "grad_norm": 14.58354377746582, "learning_rate": 9.294888948354294e-07, "loss": 0.4463, "step": 527 }, { "epoch": 0.07064490232807065, "grad_norm": 14.38774585723877, "learning_rate": 9.293550976719293e-07, "loss": 0.4803, "step": 528 }, { "epoch": 0.07077869949157078, "grad_norm": 31.745967864990234, "learning_rate": 9.292213005084292e-07, "loss": 0.5719, "step": 529 }, { "epoch": 0.0709124966550709, "grad_norm": 21.0757999420166, "learning_rate": 9.29087503344929e-07, "loss": 0.4762, "step": 530 }, { "epoch": 0.07104629381857104, "grad_norm": 16.952001571655273, "learning_rate": 9.289537061814289e-07, "loss": 0.6051, "step": 531 }, { "epoch": 0.07118009098207118, "grad_norm": 16.711505889892578, "learning_rate": 9.288199090179287e-07, "loss": 0.4424, "step": 532 }, { "epoch": 0.07131388814557131, "grad_norm": 11.563283920288086, "learning_rate": 9.286861118544287e-07, "loss": 0.4692, "step": 533 }, { "epoch": 0.07144768530907145, "grad_norm": 12.051767349243164, "learning_rate": 9.285523146909285e-07, "loss": 0.4168, "step": 534 }, { "epoch": 0.07158148247257158, "grad_norm": 15.214631080627441, "learning_rate": 9.284185175274284e-07, "loss": 0.4342, "step": 535 }, { "epoch": 0.07171527963607172, "grad_norm": 10.9327974319458, "learning_rate": 9.282847203639283e-07, "loss": 0.5325, "step": 536 }, { "epoch": 0.07184907679957185, "grad_norm": 9.062777519226074, "learning_rate": 9.281509232004281e-07, "loss": 0.4544, "step": 537 }, { "epoch": 0.07198287396307199, "grad_norm": 28.46946907043457, "learning_rate": 9.280171260369279e-07, "loss": 0.5272, "step": 538 }, { "epoch": 0.07211667112657212, "grad_norm": 8.972566604614258, "learning_rate": 9.278833288734278e-07, "loss": 0.3474, "step": 539 }, { "epoch": 0.07225046829007226, "grad_norm": 13.105140686035156, "learning_rate": 9.277495317099278e-07, "loss": 0.3908, "step": 540 }, { "epoch": 0.07238426545357239, "grad_norm": 23.280075073242188, "learning_rate": 9.276157345464277e-07, "loss": 0.6929, "step": 541 }, { "epoch": 0.07251806261707251, "grad_norm": 69.87619018554688, "learning_rate": 9.274819373829274e-07, "loss": 0.8289, "step": 542 }, { "epoch": 0.07265185978057265, "grad_norm": 21.56049346923828, "learning_rate": 9.273481402194273e-07, "loss": 0.4697, "step": 543 }, { "epoch": 0.07278565694407278, "grad_norm": 25.35953140258789, "learning_rate": 9.272143430559272e-07, "loss": 0.4203, "step": 544 }, { "epoch": 0.07291945410757292, "grad_norm": 23.212244033813477, "learning_rate": 9.270805458924271e-07, "loss": 0.5256, "step": 545 }, { "epoch": 0.07305325127107305, "grad_norm": 44.470367431640625, "learning_rate": 9.269467487289268e-07, "loss": 0.6367, "step": 546 }, { "epoch": 0.07318704843457319, "grad_norm": 26.339529037475586, "learning_rate": 9.268129515654268e-07, "loss": 0.6404, "step": 547 }, { "epoch": 0.07332084559807332, "grad_norm": 19.87677764892578, "learning_rate": 9.266791544019267e-07, "loss": 0.2384, "step": 548 }, { "epoch": 0.07345464276157346, "grad_norm": 11.523929595947266, "learning_rate": 9.265453572384266e-07, "loss": 0.4353, "step": 549 }, { "epoch": 0.07358843992507359, "grad_norm": 49.87577438354492, "learning_rate": 9.264115600749264e-07, "loss": 0.5905, "step": 550 }, { "epoch": 0.07372223708857373, "grad_norm": 42.66954803466797, "learning_rate": 9.262777629114262e-07, "loss": 0.4928, "step": 551 }, { "epoch": 0.07385603425207386, "grad_norm": 49.96345520019531, "learning_rate": 9.261439657479261e-07, "loss": 0.6193, "step": 552 }, { "epoch": 0.07398983141557398, "grad_norm": 15.841452598571777, "learning_rate": 9.260101685844259e-07, "loss": 0.5538, "step": 553 }, { "epoch": 0.07412362857907412, "grad_norm": 18.574594497680664, "learning_rate": 9.258763714209258e-07, "loss": 0.544, "step": 554 }, { "epoch": 0.07425742574257425, "grad_norm": 17.07056999206543, "learning_rate": 9.257425742574257e-07, "loss": 0.4779, "step": 555 }, { "epoch": 0.07439122290607439, "grad_norm": 19.932714462280273, "learning_rate": 9.256087770939256e-07, "loss": 0.4563, "step": 556 }, { "epoch": 0.07452502006957452, "grad_norm": 14.260254859924316, "learning_rate": 9.254749799304254e-07, "loss": 0.3764, "step": 557 }, { "epoch": 0.07465881723307466, "grad_norm": 13.108748435974121, "learning_rate": 9.253411827669253e-07, "loss": 0.5246, "step": 558 }, { "epoch": 0.0747926143965748, "grad_norm": 17.844297409057617, "learning_rate": 9.252073856034252e-07, "loss": 0.5921, "step": 559 }, { "epoch": 0.07492641156007493, "grad_norm": 24.419883728027344, "learning_rate": 9.25073588439925e-07, "loss": 0.6526, "step": 560 }, { "epoch": 0.07506020872357506, "grad_norm": 18.31925392150879, "learning_rate": 9.249397912764248e-07, "loss": 0.4269, "step": 561 }, { "epoch": 0.0751940058870752, "grad_norm": 20.77707862854004, "learning_rate": 9.248059941129248e-07, "loss": 0.4545, "step": 562 }, { "epoch": 0.07532780305057533, "grad_norm": 23.63409423828125, "learning_rate": 9.246721969494247e-07, "loss": 0.5101, "step": 563 }, { "epoch": 0.07546160021407546, "grad_norm": 24.261381149291992, "learning_rate": 9.245383997859246e-07, "loss": 0.4409, "step": 564 }, { "epoch": 0.07559539737757559, "grad_norm": 14.718764305114746, "learning_rate": 9.244046026224243e-07, "loss": 0.3989, "step": 565 }, { "epoch": 0.07572919454107573, "grad_norm": 9.397015571594238, "learning_rate": 9.242708054589242e-07, "loss": 0.492, "step": 566 }, { "epoch": 0.07586299170457586, "grad_norm": 14.579009056091309, "learning_rate": 9.241370082954241e-07, "loss": 0.4262, "step": 567 }, { "epoch": 0.075996788868076, "grad_norm": 17.027502059936523, "learning_rate": 9.24003211131924e-07, "loss": 0.6266, "step": 568 }, { "epoch": 0.07613058603157613, "grad_norm": 11.247966766357422, "learning_rate": 9.238694139684238e-07, "loss": 0.5622, "step": 569 }, { "epoch": 0.07626438319507627, "grad_norm": 13.295960426330566, "learning_rate": 9.237356168049237e-07, "loss": 0.5386, "step": 570 }, { "epoch": 0.0763981803585764, "grad_norm": 15.574817657470703, "learning_rate": 9.236018196414236e-07, "loss": 0.3717, "step": 571 }, { "epoch": 0.07653197752207654, "grad_norm": 9.596639633178711, "learning_rate": 9.234680224779235e-07, "loss": 0.5338, "step": 572 }, { "epoch": 0.07666577468557667, "grad_norm": 6.845826625823975, "learning_rate": 9.233342253144233e-07, "loss": 0.4759, "step": 573 }, { "epoch": 0.0767995718490768, "grad_norm": 11.784676551818848, "learning_rate": 9.232004281509231e-07, "loss": 0.3969, "step": 574 }, { "epoch": 0.07693336901257693, "grad_norm": 8.611150741577148, "learning_rate": 9.23066630987423e-07, "loss": 0.4442, "step": 575 }, { "epoch": 0.07706716617607706, "grad_norm": 21.85670280456543, "learning_rate": 9.229328338239228e-07, "loss": 0.2524, "step": 576 }, { "epoch": 0.0772009633395772, "grad_norm": 8.538519859313965, "learning_rate": 9.227990366604228e-07, "loss": 0.4709, "step": 577 }, { "epoch": 0.07733476050307733, "grad_norm": 13.46729850769043, "learning_rate": 9.226652394969227e-07, "loss": 0.459, "step": 578 }, { "epoch": 0.07746855766657747, "grad_norm": 12.098366737365723, "learning_rate": 9.225314423334225e-07, "loss": 0.4275, "step": 579 }, { "epoch": 0.0776023548300776, "grad_norm": 10.241510391235352, "learning_rate": 9.223976451699223e-07, "loss": 0.4848, "step": 580 }, { "epoch": 0.07773615199357774, "grad_norm": 38.732723236083984, "learning_rate": 9.222638480064222e-07, "loss": 0.6705, "step": 581 }, { "epoch": 0.07786994915707787, "grad_norm": 39.35795593261719, "learning_rate": 9.221300508429221e-07, "loss": 0.4929, "step": 582 }, { "epoch": 0.07800374632057801, "grad_norm": 21.121234893798828, "learning_rate": 9.219962536794219e-07, "loss": 0.3705, "step": 583 }, { "epoch": 0.07813754348407814, "grad_norm": 40.80171585083008, "learning_rate": 9.218624565159218e-07, "loss": 0.6832, "step": 584 }, { "epoch": 0.07827134064757828, "grad_norm": 8.464868545532227, "learning_rate": 9.217286593524217e-07, "loss": 0.4994, "step": 585 }, { "epoch": 0.0784051378110784, "grad_norm": 18.920835494995117, "learning_rate": 9.215948621889216e-07, "loss": 0.6793, "step": 586 }, { "epoch": 0.07853893497457853, "grad_norm": 22.57361602783203, "learning_rate": 9.214610650254215e-07, "loss": 0.5003, "step": 587 }, { "epoch": 0.07867273213807867, "grad_norm": 20.057310104370117, "learning_rate": 9.213272678619212e-07, "loss": 0.5722, "step": 588 }, { "epoch": 0.0788065293015788, "grad_norm": 8.068108558654785, "learning_rate": 9.211934706984211e-07, "loss": 0.5655, "step": 589 }, { "epoch": 0.07894032646507894, "grad_norm": 12.574069023132324, "learning_rate": 9.21059673534921e-07, "loss": 0.646, "step": 590 }, { "epoch": 0.07907412362857907, "grad_norm": 36.273101806640625, "learning_rate": 9.209258763714209e-07, "loss": 0.5101, "step": 591 }, { "epoch": 0.07920792079207921, "grad_norm": 32.11967086791992, "learning_rate": 9.207920792079208e-07, "loss": 0.5115, "step": 592 }, { "epoch": 0.07934171795557934, "grad_norm": 53.73308181762695, "learning_rate": 9.206582820444206e-07, "loss": 0.556, "step": 593 }, { "epoch": 0.07947551511907948, "grad_norm": 9.884782791137695, "learning_rate": 9.205244848809205e-07, "loss": 0.4582, "step": 594 }, { "epoch": 0.07960931228257961, "grad_norm": 55.438961029052734, "learning_rate": 9.203906877174204e-07, "loss": 0.6096, "step": 595 }, { "epoch": 0.07974310944607975, "grad_norm": 26.918357849121094, "learning_rate": 9.202568905539202e-07, "loss": 0.4944, "step": 596 }, { "epoch": 0.07987690660957987, "grad_norm": 17.561967849731445, "learning_rate": 9.2012309339042e-07, "loss": 0.3757, "step": 597 }, { "epoch": 0.08001070377308, "grad_norm": 9.72816276550293, "learning_rate": 9.199892962269199e-07, "loss": 0.5263, "step": 598 }, { "epoch": 0.08014450093658014, "grad_norm": 10.074684143066406, "learning_rate": 9.198554990634199e-07, "loss": 0.464, "step": 599 }, { "epoch": 0.08027829810008028, "grad_norm": 19.633316040039062, "learning_rate": 9.197217018999197e-07, "loss": 0.6933, "step": 600 }, { "epoch": 0.08041209526358041, "grad_norm": 17.142288208007812, "learning_rate": 9.195879047364196e-07, "loss": 0.4664, "step": 601 }, { "epoch": 0.08054589242708055, "grad_norm": 12.353638648986816, "learning_rate": 9.194541075729194e-07, "loss": 0.3763, "step": 602 }, { "epoch": 0.08067968959058068, "grad_norm": 53.94464111328125, "learning_rate": 9.193203104094192e-07, "loss": 0.655, "step": 603 }, { "epoch": 0.08081348675408082, "grad_norm": 23.9261531829834, "learning_rate": 9.191865132459191e-07, "loss": 0.4447, "step": 604 }, { "epoch": 0.08094728391758095, "grad_norm": 10.029491424560547, "learning_rate": 9.19052716082419e-07, "loss": 0.3946, "step": 605 }, { "epoch": 0.08108108108108109, "grad_norm": 10.02285385131836, "learning_rate": 9.18918918918919e-07, "loss": 0.6298, "step": 606 }, { "epoch": 0.08121487824458122, "grad_norm": 7.424933433532715, "learning_rate": 9.187851217554187e-07, "loss": 0.3473, "step": 607 }, { "epoch": 0.08134867540808134, "grad_norm": 12.987457275390625, "learning_rate": 9.186513245919186e-07, "loss": 0.4804, "step": 608 }, { "epoch": 0.08148247257158148, "grad_norm": 12.815722465515137, "learning_rate": 9.185175274284185e-07, "loss": 0.5169, "step": 609 }, { "epoch": 0.08161626973508161, "grad_norm": 36.044189453125, "learning_rate": 9.183837302649184e-07, "loss": 0.6965, "step": 610 }, { "epoch": 0.08175006689858175, "grad_norm": 23.352941513061523, "learning_rate": 9.182499331014181e-07, "loss": 0.3369, "step": 611 }, { "epoch": 0.08188386406208188, "grad_norm": 19.51813316345215, "learning_rate": 9.18116135937918e-07, "loss": 0.2983, "step": 612 }, { "epoch": 0.08201766122558202, "grad_norm": 31.621871948242188, "learning_rate": 9.179823387744179e-07, "loss": 0.3982, "step": 613 }, { "epoch": 0.08215145838908215, "grad_norm": 17.144229888916016, "learning_rate": 9.178485416109179e-07, "loss": 0.4073, "step": 614 }, { "epoch": 0.08228525555258229, "grad_norm": 11.697664260864258, "learning_rate": 9.177147444474177e-07, "loss": 0.4362, "step": 615 }, { "epoch": 0.08241905271608242, "grad_norm": 27.00629425048828, "learning_rate": 9.175809472839175e-07, "loss": 0.4842, "step": 616 }, { "epoch": 0.08255284987958256, "grad_norm": 11.65235710144043, "learning_rate": 9.174471501204174e-07, "loss": 0.4931, "step": 617 }, { "epoch": 0.08268664704308269, "grad_norm": 25.956134796142578, "learning_rate": 9.173133529569173e-07, "loss": 0.6445, "step": 618 }, { "epoch": 0.08282044420658283, "grad_norm": 15.30692195892334, "learning_rate": 9.171795557934171e-07, "loss": 0.3657, "step": 619 }, { "epoch": 0.08295424137008295, "grad_norm": 12.894356727600098, "learning_rate": 9.17045758629917e-07, "loss": 0.467, "step": 620 }, { "epoch": 0.08308803853358308, "grad_norm": 38.20878601074219, "learning_rate": 9.169119614664169e-07, "loss": 0.5219, "step": 621 }, { "epoch": 0.08322183569708322, "grad_norm": 13.122739791870117, "learning_rate": 9.167781643029168e-07, "loss": 0.4457, "step": 622 }, { "epoch": 0.08335563286058335, "grad_norm": 22.853092193603516, "learning_rate": 9.166443671394166e-07, "loss": 0.4946, "step": 623 }, { "epoch": 0.08348943002408349, "grad_norm": 30.003660202026367, "learning_rate": 9.165105699759165e-07, "loss": 0.3515, "step": 624 }, { "epoch": 0.08362322718758362, "grad_norm": 17.104806900024414, "learning_rate": 9.163767728124163e-07, "loss": 0.5109, "step": 625 }, { "epoch": 0.08375702435108376, "grad_norm": 63.34914779663086, "learning_rate": 9.162429756489161e-07, "loss": 0.7866, "step": 626 }, { "epoch": 0.0838908215145839, "grad_norm": 15.33239459991455, "learning_rate": 9.16109178485416e-07, "loss": 0.4729, "step": 627 }, { "epoch": 0.08402461867808403, "grad_norm": 26.375347137451172, "learning_rate": 9.15975381321916e-07, "loss": 0.4966, "step": 628 }, { "epoch": 0.08415841584158416, "grad_norm": 13.712594985961914, "learning_rate": 9.158415841584159e-07, "loss": 0.4426, "step": 629 }, { "epoch": 0.0842922130050843, "grad_norm": 15.69259262084961, "learning_rate": 9.157077869949156e-07, "loss": 0.5109, "step": 630 }, { "epoch": 0.08442601016858442, "grad_norm": 39.87456130981445, "learning_rate": 9.155739898314155e-07, "loss": 0.3298, "step": 631 }, { "epoch": 0.08455980733208456, "grad_norm": 19.17775535583496, "learning_rate": 9.154401926679154e-07, "loss": 0.5042, "step": 632 }, { "epoch": 0.08469360449558469, "grad_norm": 10.17468547821045, "learning_rate": 9.153063955044153e-07, "loss": 0.3878, "step": 633 }, { "epoch": 0.08482740165908483, "grad_norm": 12.40682601928711, "learning_rate": 9.15172598340915e-07, "loss": 0.4779, "step": 634 }, { "epoch": 0.08496119882258496, "grad_norm": 17.292409896850586, "learning_rate": 9.150388011774149e-07, "loss": 0.5747, "step": 635 }, { "epoch": 0.0850949959860851, "grad_norm": 19.239177703857422, "learning_rate": 9.149050040139149e-07, "loss": 0.3498, "step": 636 }, { "epoch": 0.08522879314958523, "grad_norm": 14.483546257019043, "learning_rate": 9.147712068504148e-07, "loss": 0.5605, "step": 637 }, { "epoch": 0.08536259031308537, "grad_norm": 11.588457107543945, "learning_rate": 9.146374096869146e-07, "loss": 0.4678, "step": 638 }, { "epoch": 0.0854963874765855, "grad_norm": 11.239026069641113, "learning_rate": 9.145036125234144e-07, "loss": 0.4824, "step": 639 }, { "epoch": 0.08563018464008564, "grad_norm": 9.539493560791016, "learning_rate": 9.143698153599143e-07, "loss": 0.4101, "step": 640 }, { "epoch": 0.08576398180358577, "grad_norm": 24.842395782470703, "learning_rate": 9.142360181964142e-07, "loss": 0.5564, "step": 641 }, { "epoch": 0.08589777896708589, "grad_norm": 13.991931915283203, "learning_rate": 9.14102221032914e-07, "loss": 0.3848, "step": 642 }, { "epoch": 0.08603157613058603, "grad_norm": 15.22680950164795, "learning_rate": 9.13968423869414e-07, "loss": 0.5014, "step": 643 }, { "epoch": 0.08616537329408616, "grad_norm": 18.912763595581055, "learning_rate": 9.138346267059138e-07, "loss": 0.5734, "step": 644 }, { "epoch": 0.0862991704575863, "grad_norm": 13.563566207885742, "learning_rate": 9.137008295424137e-07, "loss": 0.5655, "step": 645 }, { "epoch": 0.08643296762108643, "grad_norm": 17.671382904052734, "learning_rate": 9.135670323789135e-07, "loss": 0.4458, "step": 646 }, { "epoch": 0.08656676478458657, "grad_norm": 15.4027738571167, "learning_rate": 9.134332352154134e-07, "loss": 0.4972, "step": 647 }, { "epoch": 0.0867005619480867, "grad_norm": 14.734734535217285, "learning_rate": 9.132994380519133e-07, "loss": 0.4164, "step": 648 }, { "epoch": 0.08683435911158684, "grad_norm": 22.292125701904297, "learning_rate": 9.131656408884131e-07, "loss": 0.5718, "step": 649 }, { "epoch": 0.08696815627508697, "grad_norm": 14.545339584350586, "learning_rate": 9.13031843724913e-07, "loss": 0.322, "step": 650 }, { "epoch": 0.0871019534385871, "grad_norm": 13.641697883605957, "learning_rate": 9.128980465614129e-07, "loss": 0.474, "step": 651 }, { "epoch": 0.08723575060208724, "grad_norm": 39.6561393737793, "learning_rate": 9.127642493979128e-07, "loss": 0.3996, "step": 652 }, { "epoch": 0.08736954776558736, "grad_norm": 23.713661193847656, "learning_rate": 9.126304522344126e-07, "loss": 0.4168, "step": 653 }, { "epoch": 0.0875033449290875, "grad_norm": 32.604881286621094, "learning_rate": 9.124966550709124e-07, "loss": 0.4017, "step": 654 }, { "epoch": 0.08763714209258763, "grad_norm": 34.27470779418945, "learning_rate": 9.123628579074123e-07, "loss": 0.5249, "step": 655 }, { "epoch": 0.08777093925608777, "grad_norm": 20.494800567626953, "learning_rate": 9.122290607439122e-07, "loss": 0.4411, "step": 656 }, { "epoch": 0.0879047364195879, "grad_norm": 22.189308166503906, "learning_rate": 9.12095263580412e-07, "loss": 0.5632, "step": 657 }, { "epoch": 0.08803853358308804, "grad_norm": 33.52397155761719, "learning_rate": 9.119614664169119e-07, "loss": 0.8286, "step": 658 }, { "epoch": 0.08817233074658817, "grad_norm": 11.426200866699219, "learning_rate": 9.118276692534118e-07, "loss": 0.2826, "step": 659 }, { "epoch": 0.08830612791008831, "grad_norm": 29.66457176208496, "learning_rate": 9.116938720899117e-07, "loss": 0.621, "step": 660 }, { "epoch": 0.08843992507358844, "grad_norm": 36.57786178588867, "learning_rate": 9.115600749264115e-07, "loss": 0.6063, "step": 661 }, { "epoch": 0.08857372223708858, "grad_norm": 9.970532417297363, "learning_rate": 9.114262777629113e-07, "loss": 0.4228, "step": 662 }, { "epoch": 0.08870751940058871, "grad_norm": 19.843305587768555, "learning_rate": 9.112924805994112e-07, "loss": 0.7037, "step": 663 }, { "epoch": 0.08884131656408883, "grad_norm": 16.533082962036133, "learning_rate": 9.111586834359111e-07, "loss": 0.4064, "step": 664 }, { "epoch": 0.08897511372758897, "grad_norm": 19.15291404724121, "learning_rate": 9.11024886272411e-07, "loss": 0.5383, "step": 665 }, { "epoch": 0.0891089108910891, "grad_norm": 13.780901908874512, "learning_rate": 9.108910891089109e-07, "loss": 0.496, "step": 666 }, { "epoch": 0.08924270805458924, "grad_norm": 34.072940826416016, "learning_rate": 9.107572919454107e-07, "loss": 0.4874, "step": 667 }, { "epoch": 0.08937650521808937, "grad_norm": 39.832794189453125, "learning_rate": 9.106234947819106e-07, "loss": 0.4586, "step": 668 }, { "epoch": 0.08951030238158951, "grad_norm": 51.35053634643555, "learning_rate": 9.104896976184104e-07, "loss": 0.6055, "step": 669 }, { "epoch": 0.08964409954508964, "grad_norm": 24.91864013671875, "learning_rate": 9.103559004549103e-07, "loss": 0.5646, "step": 670 }, { "epoch": 0.08977789670858978, "grad_norm": 9.679680824279785, "learning_rate": 9.102221032914102e-07, "loss": 0.6255, "step": 671 }, { "epoch": 0.08991169387208992, "grad_norm": 21.19985008239746, "learning_rate": 9.100883061279101e-07, "loss": 0.4432, "step": 672 }, { "epoch": 0.09004549103559005, "grad_norm": 37.0625114440918, "learning_rate": 9.099545089644099e-07, "loss": 0.4918, "step": 673 }, { "epoch": 0.09017928819909019, "grad_norm": 27.463720321655273, "learning_rate": 9.098207118009098e-07, "loss": 0.4147, "step": 674 }, { "epoch": 0.0903130853625903, "grad_norm": 22.91330909729004, "learning_rate": 9.096869146374097e-07, "loss": 0.4449, "step": 675 }, { "epoch": 0.09044688252609044, "grad_norm": 19.888486862182617, "learning_rate": 9.095531174739096e-07, "loss": 0.5309, "step": 676 }, { "epoch": 0.09058067968959058, "grad_norm": 25.40857696533203, "learning_rate": 9.094193203104093e-07, "loss": 0.2949, "step": 677 }, { "epoch": 0.09071447685309071, "grad_norm": 9.291589736938477, "learning_rate": 9.092855231469092e-07, "loss": 0.5784, "step": 678 }, { "epoch": 0.09084827401659085, "grad_norm": 7.800741672515869, "learning_rate": 9.091517259834091e-07, "loss": 0.4401, "step": 679 }, { "epoch": 0.09098207118009098, "grad_norm": 12.502025604248047, "learning_rate": 9.090179288199091e-07, "loss": 0.3482, "step": 680 }, { "epoch": 0.09111586834359112, "grad_norm": 36.11296844482422, "learning_rate": 9.088841316564088e-07, "loss": 0.576, "step": 681 }, { "epoch": 0.09124966550709125, "grad_norm": 29.751001358032227, "learning_rate": 9.087503344929087e-07, "loss": 0.6388, "step": 682 }, { "epoch": 0.09138346267059139, "grad_norm": 7.644384860992432, "learning_rate": 9.086165373294086e-07, "loss": 0.3451, "step": 683 }, { "epoch": 0.09151725983409152, "grad_norm": 7.066762924194336, "learning_rate": 9.084827401659084e-07, "loss": 0.4498, "step": 684 }, { "epoch": 0.09165105699759166, "grad_norm": 17.792682647705078, "learning_rate": 9.083489430024083e-07, "loss": 0.4727, "step": 685 }, { "epoch": 0.09178485416109179, "grad_norm": 12.261168479919434, "learning_rate": 9.082151458389081e-07, "loss": 0.4779, "step": 686 }, { "epoch": 0.09191865132459191, "grad_norm": 8.690581321716309, "learning_rate": 9.080813486754081e-07, "loss": 0.4472, "step": 687 }, { "epoch": 0.09205244848809205, "grad_norm": 7.186450004577637, "learning_rate": 9.079475515119079e-07, "loss": 0.436, "step": 688 }, { "epoch": 0.09218624565159218, "grad_norm": 10.03033447265625, "learning_rate": 9.078137543484078e-07, "loss": 0.5115, "step": 689 }, { "epoch": 0.09232004281509232, "grad_norm": 30.722579956054688, "learning_rate": 9.076799571849077e-07, "loss": 0.2734, "step": 690 }, { "epoch": 0.09245383997859245, "grad_norm": 23.727291107177734, "learning_rate": 9.075461600214075e-07, "loss": 0.301, "step": 691 }, { "epoch": 0.09258763714209259, "grad_norm": 39.28633499145508, "learning_rate": 9.074123628579073e-07, "loss": 0.7769, "step": 692 }, { "epoch": 0.09272143430559272, "grad_norm": 8.673612594604492, "learning_rate": 9.072785656944072e-07, "loss": 0.4495, "step": 693 }, { "epoch": 0.09285523146909286, "grad_norm": 24.42963409423828, "learning_rate": 9.071447685309072e-07, "loss": 0.4324, "step": 694 }, { "epoch": 0.092989028632593, "grad_norm": 37.56135559082031, "learning_rate": 9.07010971367407e-07, "loss": 0.5118, "step": 695 }, { "epoch": 0.09312282579609313, "grad_norm": 7.158039093017578, "learning_rate": 9.068771742039068e-07, "loss": 0.3711, "step": 696 }, { "epoch": 0.09325662295959326, "grad_norm": 27.082849502563477, "learning_rate": 9.067433770404067e-07, "loss": 0.4987, "step": 697 }, { "epoch": 0.09339042012309338, "grad_norm": 23.052635192871094, "learning_rate": 9.066095798769066e-07, "loss": 0.5316, "step": 698 }, { "epoch": 0.09352421728659352, "grad_norm": 22.628032684326172, "learning_rate": 9.064757827134065e-07, "loss": 0.6864, "step": 699 }, { "epoch": 0.09365801445009365, "grad_norm": 12.442644119262695, "learning_rate": 9.063419855499062e-07, "loss": 0.4727, "step": 700 }, { "epoch": 0.09379181161359379, "grad_norm": 11.909784317016602, "learning_rate": 9.062081883864061e-07, "loss": 0.6442, "step": 701 }, { "epoch": 0.09392560877709392, "grad_norm": 11.078575134277344, "learning_rate": 9.060743912229061e-07, "loss": 0.5296, "step": 702 }, { "epoch": 0.09405940594059406, "grad_norm": 17.90765380859375, "learning_rate": 9.05940594059406e-07, "loss": 0.4594, "step": 703 }, { "epoch": 0.0941932031040942, "grad_norm": 26.539030075073242, "learning_rate": 9.058067968959057e-07, "loss": 0.4553, "step": 704 }, { "epoch": 0.09432700026759433, "grad_norm": 10.72231388092041, "learning_rate": 9.056729997324056e-07, "loss": 0.5432, "step": 705 }, { "epoch": 0.09446079743109446, "grad_norm": 8.968864440917969, "learning_rate": 9.055392025689055e-07, "loss": 0.5595, "step": 706 }, { "epoch": 0.0945945945945946, "grad_norm": 27.876413345336914, "learning_rate": 9.054054054054053e-07, "loss": 0.4503, "step": 707 }, { "epoch": 0.09472839175809473, "grad_norm": 21.583221435546875, "learning_rate": 9.052716082419052e-07, "loss": 0.3401, "step": 708 }, { "epoch": 0.09486218892159486, "grad_norm": 8.033329963684082, "learning_rate": 9.051378110784051e-07, "loss": 0.3836, "step": 709 }, { "epoch": 0.09499598608509499, "grad_norm": 7.947105407714844, "learning_rate": 9.05004013914905e-07, "loss": 0.4176, "step": 710 }, { "epoch": 0.09512978324859513, "grad_norm": 8.328703880310059, "learning_rate": 9.048702167514048e-07, "loss": 0.3866, "step": 711 }, { "epoch": 0.09526358041209526, "grad_norm": 17.855728149414062, "learning_rate": 9.047364195879047e-07, "loss": 0.5279, "step": 712 }, { "epoch": 0.0953973775755954, "grad_norm": 28.608291625976562, "learning_rate": 9.046026224244046e-07, "loss": 0.5391, "step": 713 }, { "epoch": 0.09553117473909553, "grad_norm": 49.18524169921875, "learning_rate": 9.044688252609044e-07, "loss": 0.7314, "step": 714 }, { "epoch": 0.09566497190259567, "grad_norm": 15.241083145141602, "learning_rate": 9.043350280974042e-07, "loss": 0.3956, "step": 715 }, { "epoch": 0.0957987690660958, "grad_norm": 27.740995407104492, "learning_rate": 9.042012309339042e-07, "loss": 0.6081, "step": 716 }, { "epoch": 0.09593256622959594, "grad_norm": 23.817476272583008, "learning_rate": 9.040674337704041e-07, "loss": 0.6297, "step": 717 }, { "epoch": 0.09606636339309607, "grad_norm": 24.339311599731445, "learning_rate": 9.03933636606904e-07, "loss": 0.376, "step": 718 }, { "epoch": 0.0962001605565962, "grad_norm": 16.934701919555664, "learning_rate": 9.037998394434037e-07, "loss": 0.5874, "step": 719 }, { "epoch": 0.09633395772009633, "grad_norm": 10.778347969055176, "learning_rate": 9.036660422799036e-07, "loss": 0.4151, "step": 720 }, { "epoch": 0.09646775488359646, "grad_norm": 24.535385131835938, "learning_rate": 9.035322451164035e-07, "loss": 0.4996, "step": 721 }, { "epoch": 0.0966015520470966, "grad_norm": 50.903568267822266, "learning_rate": 9.033984479529034e-07, "loss": 0.4855, "step": 722 }, { "epoch": 0.09673534921059673, "grad_norm": 12.143138885498047, "learning_rate": 9.032646507894031e-07, "loss": 0.5381, "step": 723 }, { "epoch": 0.09686914637409687, "grad_norm": 14.284943580627441, "learning_rate": 9.031308536259031e-07, "loss": 0.5819, "step": 724 }, { "epoch": 0.097002943537597, "grad_norm": 29.324094772338867, "learning_rate": 9.02997056462403e-07, "loss": 0.4398, "step": 725 }, { "epoch": 0.09713674070109714, "grad_norm": 52.156005859375, "learning_rate": 9.028632592989029e-07, "loss": 0.3008, "step": 726 }, { "epoch": 0.09727053786459727, "grad_norm": 13.819735527038574, "learning_rate": 9.027294621354027e-07, "loss": 0.4472, "step": 727 }, { "epoch": 0.09740433502809741, "grad_norm": 19.630250930786133, "learning_rate": 9.025956649719025e-07, "loss": 0.434, "step": 728 }, { "epoch": 0.09753813219159754, "grad_norm": 15.424079895019531, "learning_rate": 9.024618678084024e-07, "loss": 0.3984, "step": 729 }, { "epoch": 0.09767192935509768, "grad_norm": 48.98758316040039, "learning_rate": 9.023280706449022e-07, "loss": 0.7398, "step": 730 }, { "epoch": 0.0978057265185978, "grad_norm": 18.88811683654785, "learning_rate": 9.021942734814022e-07, "loss": 0.5447, "step": 731 }, { "epoch": 0.09793952368209793, "grad_norm": 31.95454978942871, "learning_rate": 9.02060476317902e-07, "loss": 0.4582, "step": 732 }, { "epoch": 0.09807332084559807, "grad_norm": 16.317214965820312, "learning_rate": 9.019266791544019e-07, "loss": 0.2363, "step": 733 }, { "epoch": 0.0982071180090982, "grad_norm": 40.92802810668945, "learning_rate": 9.017928819909017e-07, "loss": 0.5569, "step": 734 }, { "epoch": 0.09834091517259834, "grad_norm": 24.70380973815918, "learning_rate": 9.016590848274016e-07, "loss": 0.55, "step": 735 }, { "epoch": 0.09847471233609847, "grad_norm": 12.042128562927246, "learning_rate": 9.015252876639015e-07, "loss": 0.3489, "step": 736 }, { "epoch": 0.09860850949959861, "grad_norm": 15.674334526062012, "learning_rate": 9.013914905004013e-07, "loss": 0.5667, "step": 737 }, { "epoch": 0.09874230666309874, "grad_norm": 11.218344688415527, "learning_rate": 9.012576933369012e-07, "loss": 0.3473, "step": 738 }, { "epoch": 0.09887610382659888, "grad_norm": 23.220876693725586, "learning_rate": 9.011238961734011e-07, "loss": 0.5395, "step": 739 }, { "epoch": 0.09900990099009901, "grad_norm": 15.11190414428711, "learning_rate": 9.00990099009901e-07, "loss": 0.5538, "step": 740 }, { "epoch": 0.09914369815359915, "grad_norm": 25.811800003051758, "learning_rate": 9.008563018464009e-07, "loss": 0.4366, "step": 741 }, { "epoch": 0.09927749531709927, "grad_norm": 8.403963088989258, "learning_rate": 9.007225046829006e-07, "loss": 0.4455, "step": 742 }, { "epoch": 0.0994112924805994, "grad_norm": 12.835809707641602, "learning_rate": 9.005887075194005e-07, "loss": 0.541, "step": 743 }, { "epoch": 0.09954508964409954, "grad_norm": 28.170032501220703, "learning_rate": 9.004549103559004e-07, "loss": 0.3957, "step": 744 }, { "epoch": 0.09967888680759968, "grad_norm": 12.099746704101562, "learning_rate": 9.003211131924003e-07, "loss": 0.3387, "step": 745 }, { "epoch": 0.09981268397109981, "grad_norm": 32.1656494140625, "learning_rate": 9.001873160289001e-07, "loss": 0.4645, "step": 746 }, { "epoch": 0.09994648113459995, "grad_norm": 70.03046417236328, "learning_rate": 9.000535188654e-07, "loss": 0.4848, "step": 747 }, { "epoch": 0.10008027829810008, "grad_norm": 33.21613311767578, "learning_rate": 8.999197217018999e-07, "loss": 0.4655, "step": 748 }, { "epoch": 0.10021407546160022, "grad_norm": 19.905366897583008, "learning_rate": 8.997859245383998e-07, "loss": 0.3081, "step": 749 }, { "epoch": 0.10034787262510035, "grad_norm": 21.31903076171875, "learning_rate": 8.996521273748996e-07, "loss": 0.4487, "step": 750 }, { "epoch": 0.10048166978860049, "grad_norm": 16.4615535736084, "learning_rate": 8.995183302113994e-07, "loss": 0.4695, "step": 751 }, { "epoch": 0.10061546695210062, "grad_norm": 18.243915557861328, "learning_rate": 8.993845330478993e-07, "loss": 0.3641, "step": 752 }, { "epoch": 0.10074926411560074, "grad_norm": 22.250038146972656, "learning_rate": 8.992507358843993e-07, "loss": 0.7583, "step": 753 }, { "epoch": 0.10088306127910088, "grad_norm": 25.206314086914062, "learning_rate": 8.991169387208991e-07, "loss": 0.7735, "step": 754 }, { "epoch": 0.10101685844260101, "grad_norm": 33.147254943847656, "learning_rate": 8.98983141557399e-07, "loss": 0.3314, "step": 755 }, { "epoch": 0.10115065560610115, "grad_norm": 14.080107688903809, "learning_rate": 8.988493443938988e-07, "loss": 0.2652, "step": 756 }, { "epoch": 0.10128445276960128, "grad_norm": 17.81605339050293, "learning_rate": 8.987155472303987e-07, "loss": 0.6016, "step": 757 }, { "epoch": 0.10141824993310142, "grad_norm": 37.5146598815918, "learning_rate": 8.985817500668985e-07, "loss": 0.7445, "step": 758 }, { "epoch": 0.10155204709660155, "grad_norm": 15.712850570678711, "learning_rate": 8.984479529033984e-07, "loss": 0.3309, "step": 759 }, { "epoch": 0.10168584426010169, "grad_norm": 27.27952003479004, "learning_rate": 8.983141557398984e-07, "loss": 0.4804, "step": 760 }, { "epoch": 0.10181964142360182, "grad_norm": 18.630985260009766, "learning_rate": 8.981803585763981e-07, "loss": 0.4407, "step": 761 }, { "epoch": 0.10195343858710196, "grad_norm": 31.261587142944336, "learning_rate": 8.98046561412898e-07, "loss": 0.4658, "step": 762 }, { "epoch": 0.10208723575060209, "grad_norm": 12.010231018066406, "learning_rate": 8.979127642493979e-07, "loss": 0.4312, "step": 763 }, { "epoch": 0.10222103291410223, "grad_norm": 14.353791236877441, "learning_rate": 8.977789670858978e-07, "loss": 0.4124, "step": 764 }, { "epoch": 0.10235483007760235, "grad_norm": 7.192036151885986, "learning_rate": 8.976451699223975e-07, "loss": 0.4297, "step": 765 }, { "epoch": 0.10248862724110248, "grad_norm": 30.181739807128906, "learning_rate": 8.975113727588974e-07, "loss": 0.3512, "step": 766 }, { "epoch": 0.10262242440460262, "grad_norm": 25.776287078857422, "learning_rate": 8.973775755953973e-07, "loss": 0.5106, "step": 767 }, { "epoch": 0.10275622156810275, "grad_norm": 7.36591100692749, "learning_rate": 8.972437784318973e-07, "loss": 0.5163, "step": 768 }, { "epoch": 0.10289001873160289, "grad_norm": 11.577953338623047, "learning_rate": 8.971099812683971e-07, "loss": 0.473, "step": 769 }, { "epoch": 0.10302381589510302, "grad_norm": 11.455196380615234, "learning_rate": 8.969761841048969e-07, "loss": 0.5604, "step": 770 }, { "epoch": 0.10315761305860316, "grad_norm": 10.775403022766113, "learning_rate": 8.968423869413968e-07, "loss": 0.537, "step": 771 }, { "epoch": 0.1032914102221033, "grad_norm": 24.594057083129883, "learning_rate": 8.967085897778967e-07, "loss": 0.4379, "step": 772 }, { "epoch": 0.10342520738560343, "grad_norm": 14.65210247039795, "learning_rate": 8.965747926143965e-07, "loss": 0.4259, "step": 773 }, { "epoch": 0.10355900454910356, "grad_norm": 30.83063316345215, "learning_rate": 8.964409954508963e-07, "loss": 0.5024, "step": 774 }, { "epoch": 0.1036928017126037, "grad_norm": 31.640966415405273, "learning_rate": 8.963071982873963e-07, "loss": 0.4512, "step": 775 }, { "epoch": 0.10382659887610382, "grad_norm": 35.152828216552734, "learning_rate": 8.961734011238962e-07, "loss": 0.69, "step": 776 }, { "epoch": 0.10396039603960396, "grad_norm": 21.665678024291992, "learning_rate": 8.96039603960396e-07, "loss": 0.433, "step": 777 }, { "epoch": 0.10409419320310409, "grad_norm": 12.191967964172363, "learning_rate": 8.959058067968959e-07, "loss": 0.4341, "step": 778 }, { "epoch": 0.10422799036660423, "grad_norm": 17.024023056030273, "learning_rate": 8.957720096333957e-07, "loss": 0.5228, "step": 779 }, { "epoch": 0.10436178753010436, "grad_norm": 10.481995582580566, "learning_rate": 8.956382124698956e-07, "loss": 0.3958, "step": 780 }, { "epoch": 0.1044955846936045, "grad_norm": 20.052507400512695, "learning_rate": 8.955044153063954e-07, "loss": 0.2498, "step": 781 }, { "epoch": 0.10462938185710463, "grad_norm": 21.397798538208008, "learning_rate": 8.953706181428954e-07, "loss": 0.5441, "step": 782 }, { "epoch": 0.10476317902060477, "grad_norm": 13.760491371154785, "learning_rate": 8.952368209793953e-07, "loss": 0.5102, "step": 783 }, { "epoch": 0.1048969761841049, "grad_norm": 10.294989585876465, "learning_rate": 8.951030238158951e-07, "loss": 0.4714, "step": 784 }, { "epoch": 0.10503077334760504, "grad_norm": 25.26814079284668, "learning_rate": 8.949692266523949e-07, "loss": 0.5457, "step": 785 }, { "epoch": 0.10516457051110517, "grad_norm": 29.864837646484375, "learning_rate": 8.948354294888948e-07, "loss": 0.5478, "step": 786 }, { "epoch": 0.10529836767460529, "grad_norm": 16.881027221679688, "learning_rate": 8.947016323253947e-07, "loss": 0.3647, "step": 787 }, { "epoch": 0.10543216483810543, "grad_norm": 11.91662883758545, "learning_rate": 8.945678351618944e-07, "loss": 0.5905, "step": 788 }, { "epoch": 0.10556596200160556, "grad_norm": 33.010196685791016, "learning_rate": 8.944340379983943e-07, "loss": 0.6288, "step": 789 }, { "epoch": 0.1056997591651057, "grad_norm": 20.19643783569336, "learning_rate": 8.943002408348943e-07, "loss": 0.4766, "step": 790 }, { "epoch": 0.10583355632860583, "grad_norm": 18.936431884765625, "learning_rate": 8.941664436713942e-07, "loss": 0.4632, "step": 791 }, { "epoch": 0.10596735349210597, "grad_norm": 20.155702590942383, "learning_rate": 8.94032646507894e-07, "loss": 0.6439, "step": 792 }, { "epoch": 0.1061011506556061, "grad_norm": 13.606155395507812, "learning_rate": 8.938988493443938e-07, "loss": 0.4628, "step": 793 }, { "epoch": 0.10623494781910624, "grad_norm": 23.88871955871582, "learning_rate": 8.937650521808937e-07, "loss": 0.5346, "step": 794 }, { "epoch": 0.10636874498260637, "grad_norm": 9.215771675109863, "learning_rate": 8.936312550173936e-07, "loss": 0.4929, "step": 795 }, { "epoch": 0.10650254214610651, "grad_norm": 22.666231155395508, "learning_rate": 8.934974578538934e-07, "loss": 0.3415, "step": 796 }, { "epoch": 0.10663633930960664, "grad_norm": 16.323339462280273, "learning_rate": 8.933636606903934e-07, "loss": 0.5302, "step": 797 }, { "epoch": 0.10677013647310676, "grad_norm": 18.204084396362305, "learning_rate": 8.932298635268932e-07, "loss": 0.386, "step": 798 }, { "epoch": 0.1069039336366069, "grad_norm": 12.31531810760498, "learning_rate": 8.930960663633931e-07, "loss": 0.659, "step": 799 }, { "epoch": 0.10703773080010703, "grad_norm": 22.47266387939453, "learning_rate": 8.929622691998929e-07, "loss": 0.3134, "step": 800 }, { "epoch": 0.10717152796360717, "grad_norm": 11.453672409057617, "learning_rate": 8.928284720363928e-07, "loss": 0.4964, "step": 801 }, { "epoch": 0.1073053251271073, "grad_norm": 13.700913429260254, "learning_rate": 8.926946748728926e-07, "loss": 0.4269, "step": 802 }, { "epoch": 0.10743912229060744, "grad_norm": 16.925214767456055, "learning_rate": 8.925608777093925e-07, "loss": 0.4998, "step": 803 }, { "epoch": 0.10757291945410757, "grad_norm": 11.063761711120605, "learning_rate": 8.924270805458924e-07, "loss": 0.3944, "step": 804 }, { "epoch": 0.10770671661760771, "grad_norm": 11.731231689453125, "learning_rate": 8.922932833823923e-07, "loss": 0.2553, "step": 805 }, { "epoch": 0.10784051378110784, "grad_norm": 42.3464241027832, "learning_rate": 8.921594862188922e-07, "loss": 0.5971, "step": 806 }, { "epoch": 0.10797431094460798, "grad_norm": 24.84994125366211, "learning_rate": 8.92025689055392e-07, "loss": 0.2669, "step": 807 }, { "epoch": 0.10810810810810811, "grad_norm": 38.671627044677734, "learning_rate": 8.918918918918918e-07, "loss": 0.6808, "step": 808 }, { "epoch": 0.10824190527160824, "grad_norm": 10.725581169128418, "learning_rate": 8.917580947283917e-07, "loss": 0.4866, "step": 809 }, { "epoch": 0.10837570243510837, "grad_norm": 9.029829025268555, "learning_rate": 8.916242975648916e-07, "loss": 0.3431, "step": 810 }, { "epoch": 0.1085094995986085, "grad_norm": 13.313252449035645, "learning_rate": 8.914905004013913e-07, "loss": 0.4637, "step": 811 }, { "epoch": 0.10864329676210864, "grad_norm": 14.66236686706543, "learning_rate": 8.913567032378913e-07, "loss": 0.5275, "step": 812 }, { "epoch": 0.10877709392560878, "grad_norm": 29.26650619506836, "learning_rate": 8.912229060743912e-07, "loss": 0.7235, "step": 813 }, { "epoch": 0.10891089108910891, "grad_norm": 50.56870651245117, "learning_rate": 8.910891089108911e-07, "loss": 0.6583, "step": 814 }, { "epoch": 0.10904468825260905, "grad_norm": 16.649009704589844, "learning_rate": 8.909553117473909e-07, "loss": 0.5096, "step": 815 }, { "epoch": 0.10917848541610918, "grad_norm": 24.49722671508789, "learning_rate": 8.908215145838907e-07, "loss": 0.5664, "step": 816 }, { "epoch": 0.10931228257960932, "grad_norm": 12.531848907470703, "learning_rate": 8.906877174203906e-07, "loss": 0.5525, "step": 817 }, { "epoch": 0.10944607974310945, "grad_norm": 32.676395416259766, "learning_rate": 8.905539202568905e-07, "loss": 0.3813, "step": 818 }, { "epoch": 0.10957987690660959, "grad_norm": 12.870556831359863, "learning_rate": 8.904201230933904e-07, "loss": 0.5447, "step": 819 }, { "epoch": 0.1097136740701097, "grad_norm": 32.34736251831055, "learning_rate": 8.902863259298903e-07, "loss": 0.3759, "step": 820 }, { "epoch": 0.10984747123360984, "grad_norm": 20.7248477935791, "learning_rate": 8.901525287663901e-07, "loss": 0.4178, "step": 821 }, { "epoch": 0.10998126839710998, "grad_norm": 9.585858345031738, "learning_rate": 8.9001873160289e-07, "loss": 0.4593, "step": 822 }, { "epoch": 0.11011506556061011, "grad_norm": 18.138399124145508, "learning_rate": 8.898849344393898e-07, "loss": 0.3429, "step": 823 }, { "epoch": 0.11024886272411025, "grad_norm": 8.800107955932617, "learning_rate": 8.897511372758897e-07, "loss": 0.3675, "step": 824 }, { "epoch": 0.11038265988761038, "grad_norm": 31.159887313842773, "learning_rate": 8.896173401123896e-07, "loss": 0.578, "step": 825 }, { "epoch": 0.11051645705111052, "grad_norm": 5.751346111297607, "learning_rate": 8.894835429488895e-07, "loss": 0.3468, "step": 826 }, { "epoch": 0.11065025421461065, "grad_norm": 22.50516700744629, "learning_rate": 8.893497457853893e-07, "loss": 0.5124, "step": 827 }, { "epoch": 0.11078405137811079, "grad_norm": 7.556909561157227, "learning_rate": 8.892159486218892e-07, "loss": 0.3234, "step": 828 }, { "epoch": 0.11091784854161092, "grad_norm": 39.612483978271484, "learning_rate": 8.890821514583891e-07, "loss": 0.6278, "step": 829 }, { "epoch": 0.11105164570511106, "grad_norm": 19.814714431762695, "learning_rate": 8.88948354294889e-07, "loss": 0.5297, "step": 830 }, { "epoch": 0.11118544286861119, "grad_norm": 9.982979774475098, "learning_rate": 8.888145571313887e-07, "loss": 0.3953, "step": 831 }, { "epoch": 0.11131924003211131, "grad_norm": 13.36428165435791, "learning_rate": 8.886807599678886e-07, "loss": 0.529, "step": 832 }, { "epoch": 0.11145303719561145, "grad_norm": 10.580708503723145, "learning_rate": 8.885469628043885e-07, "loss": 0.38, "step": 833 }, { "epoch": 0.11158683435911158, "grad_norm": 24.574657440185547, "learning_rate": 8.884131656408885e-07, "loss": 0.411, "step": 834 }, { "epoch": 0.11172063152261172, "grad_norm": 12.533242225646973, "learning_rate": 8.882793684773882e-07, "loss": 0.4395, "step": 835 }, { "epoch": 0.11185442868611185, "grad_norm": 12.128975868225098, "learning_rate": 8.881455713138881e-07, "loss": 0.5242, "step": 836 }, { "epoch": 0.11198822584961199, "grad_norm": 8.66299819946289, "learning_rate": 8.88011774150388e-07, "loss": 0.4322, "step": 837 }, { "epoch": 0.11212202301311212, "grad_norm": 22.39251136779785, "learning_rate": 8.878779769868878e-07, "loss": 0.5904, "step": 838 }, { "epoch": 0.11225582017661226, "grad_norm": 17.978452682495117, "learning_rate": 8.877441798233877e-07, "loss": 0.42, "step": 839 }, { "epoch": 0.1123896173401124, "grad_norm": 11.041504859924316, "learning_rate": 8.876103826598875e-07, "loss": 0.4378, "step": 840 }, { "epoch": 0.11252341450361253, "grad_norm": 9.005702018737793, "learning_rate": 8.874765854963875e-07, "loss": 0.4487, "step": 841 }, { "epoch": 0.11265721166711266, "grad_norm": 40.194305419921875, "learning_rate": 8.873427883328873e-07, "loss": 0.2944, "step": 842 }, { "epoch": 0.11279100883061279, "grad_norm": 11.456842422485352, "learning_rate": 8.872089911693872e-07, "loss": 0.4534, "step": 843 }, { "epoch": 0.11292480599411292, "grad_norm": 22.931297302246094, "learning_rate": 8.87075194005887e-07, "loss": 0.3329, "step": 844 }, { "epoch": 0.11305860315761306, "grad_norm": 31.3056697845459, "learning_rate": 8.869413968423869e-07, "loss": 0.7641, "step": 845 }, { "epoch": 0.11319240032111319, "grad_norm": 14.28117847442627, "learning_rate": 8.868075996788867e-07, "loss": 0.5808, "step": 846 }, { "epoch": 0.11332619748461333, "grad_norm": 27.731548309326172, "learning_rate": 8.866738025153866e-07, "loss": 0.3572, "step": 847 }, { "epoch": 0.11345999464811346, "grad_norm": 8.655011177062988, "learning_rate": 8.865400053518866e-07, "loss": 0.5136, "step": 848 }, { "epoch": 0.1135937918116136, "grad_norm": 13.224489212036133, "learning_rate": 8.864062081883864e-07, "loss": 0.2698, "step": 849 }, { "epoch": 0.11372758897511373, "grad_norm": 16.410627365112305, "learning_rate": 8.862724110248862e-07, "loss": 0.4456, "step": 850 }, { "epoch": 0.11386138613861387, "grad_norm": 32.99456024169922, "learning_rate": 8.861386138613861e-07, "loss": 0.8005, "step": 851 }, { "epoch": 0.113995183302114, "grad_norm": 20.271902084350586, "learning_rate": 8.86004816697886e-07, "loss": 0.5153, "step": 852 }, { "epoch": 0.11412898046561414, "grad_norm": 9.961291313171387, "learning_rate": 8.858710195343859e-07, "loss": 0.3998, "step": 853 }, { "epoch": 0.11426277762911426, "grad_norm": 18.67681312561035, "learning_rate": 8.857372223708856e-07, "loss": 0.5013, "step": 854 }, { "epoch": 0.11439657479261439, "grad_norm": 12.6431303024292, "learning_rate": 8.856034252073855e-07, "loss": 0.4262, "step": 855 }, { "epoch": 0.11453037195611453, "grad_norm": 14.195055961608887, "learning_rate": 8.854696280438855e-07, "loss": 0.5638, "step": 856 }, { "epoch": 0.11466416911961466, "grad_norm": 16.536895751953125, "learning_rate": 8.853358308803854e-07, "loss": 0.5373, "step": 857 }, { "epoch": 0.1147979662831148, "grad_norm": 9.53983211517334, "learning_rate": 8.852020337168851e-07, "loss": 0.4645, "step": 858 }, { "epoch": 0.11493176344661493, "grad_norm": 24.389205932617188, "learning_rate": 8.85068236553385e-07, "loss": 0.5083, "step": 859 }, { "epoch": 0.11506556061011507, "grad_norm": 7.515247344970703, "learning_rate": 8.849344393898849e-07, "loss": 0.4473, "step": 860 }, { "epoch": 0.1151993577736152, "grad_norm": 13.269707679748535, "learning_rate": 8.848006422263847e-07, "loss": 0.5511, "step": 861 }, { "epoch": 0.11533315493711534, "grad_norm": 25.708908081054688, "learning_rate": 8.846668450628846e-07, "loss": 0.6849, "step": 862 }, { "epoch": 0.11546695210061547, "grad_norm": 10.085738182067871, "learning_rate": 8.845330478993845e-07, "loss": 0.4765, "step": 863 }, { "epoch": 0.11560074926411561, "grad_norm": 20.053102493286133, "learning_rate": 8.843992507358844e-07, "loss": 0.5199, "step": 864 }, { "epoch": 0.11573454642761573, "grad_norm": 23.958457946777344, "learning_rate": 8.842654535723842e-07, "loss": 0.4112, "step": 865 }, { "epoch": 0.11586834359111586, "grad_norm": 16.92732048034668, "learning_rate": 8.841316564088841e-07, "loss": 0.512, "step": 866 }, { "epoch": 0.116002140754616, "grad_norm": 13.155503273010254, "learning_rate": 8.83997859245384e-07, "loss": 0.5665, "step": 867 }, { "epoch": 0.11613593791811613, "grad_norm": 9.862957954406738, "learning_rate": 8.838640620818838e-07, "loss": 0.4227, "step": 868 }, { "epoch": 0.11626973508161627, "grad_norm": 15.3148193359375, "learning_rate": 8.837302649183836e-07, "loss": 0.5649, "step": 869 }, { "epoch": 0.1164035322451164, "grad_norm": 32.83000564575195, "learning_rate": 8.835964677548835e-07, "loss": 0.3671, "step": 870 }, { "epoch": 0.11653732940861654, "grad_norm": 13.505109786987305, "learning_rate": 8.834626705913835e-07, "loss": 0.5349, "step": 871 }, { "epoch": 0.11667112657211667, "grad_norm": 25.422809600830078, "learning_rate": 8.833288734278833e-07, "loss": 0.5545, "step": 872 }, { "epoch": 0.11680492373561681, "grad_norm": 10.950857162475586, "learning_rate": 8.831950762643831e-07, "loss": 0.4232, "step": 873 }, { "epoch": 0.11693872089911694, "grad_norm": 8.161943435668945, "learning_rate": 8.83061279100883e-07, "loss": 0.3974, "step": 874 }, { "epoch": 0.11707251806261708, "grad_norm": 12.813623428344727, "learning_rate": 8.829274819373829e-07, "loss": 0.4684, "step": 875 }, { "epoch": 0.1172063152261172, "grad_norm": 32.276947021484375, "learning_rate": 8.827936847738828e-07, "loss": 0.549, "step": 876 }, { "epoch": 0.11734011238961733, "grad_norm": 14.642736434936523, "learning_rate": 8.826598876103825e-07, "loss": 0.4153, "step": 877 }, { "epoch": 0.11747390955311747, "grad_norm": 16.563629150390625, "learning_rate": 8.825260904468825e-07, "loss": 0.4076, "step": 878 }, { "epoch": 0.1176077067166176, "grad_norm": 16.34657096862793, "learning_rate": 8.823922932833824e-07, "loss": 0.4405, "step": 879 }, { "epoch": 0.11774150388011774, "grad_norm": 13.314958572387695, "learning_rate": 8.822584961198823e-07, "loss": 0.3231, "step": 880 }, { "epoch": 0.11787530104361787, "grad_norm": 26.295183181762695, "learning_rate": 8.82124698956382e-07, "loss": 0.6045, "step": 881 }, { "epoch": 0.11800909820711801, "grad_norm": 14.524567604064941, "learning_rate": 8.819909017928819e-07, "loss": 0.5556, "step": 882 }, { "epoch": 0.11814289537061815, "grad_norm": 11.957655906677246, "learning_rate": 8.818571046293818e-07, "loss": 0.5787, "step": 883 }, { "epoch": 0.11827669253411828, "grad_norm": 24.40470314025879, "learning_rate": 8.817233074658817e-07, "loss": 0.3576, "step": 884 }, { "epoch": 0.11841048969761842, "grad_norm": 18.907499313354492, "learning_rate": 8.815895103023816e-07, "loss": 0.3415, "step": 885 }, { "epoch": 0.11854428686111855, "grad_norm": 8.39202880859375, "learning_rate": 8.814557131388814e-07, "loss": 0.3923, "step": 886 }, { "epoch": 0.11867808402461867, "grad_norm": 8.302163124084473, "learning_rate": 8.813219159753813e-07, "loss": 0.4331, "step": 887 }, { "epoch": 0.1188118811881188, "grad_norm": 36.47541046142578, "learning_rate": 8.811881188118812e-07, "loss": 0.6711, "step": 888 }, { "epoch": 0.11894567835161894, "grad_norm": 10.672859191894531, "learning_rate": 8.81054321648381e-07, "loss": 0.5026, "step": 889 }, { "epoch": 0.11907947551511908, "grad_norm": 8.246297836303711, "learning_rate": 8.809205244848809e-07, "loss": 0.4119, "step": 890 }, { "epoch": 0.11921327267861921, "grad_norm": 13.752643585205078, "learning_rate": 8.807867273213807e-07, "loss": 0.5451, "step": 891 }, { "epoch": 0.11934706984211935, "grad_norm": 24.74436378479004, "learning_rate": 8.806529301578805e-07, "loss": 0.3415, "step": 892 }, { "epoch": 0.11948086700561948, "grad_norm": 29.482648849487305, "learning_rate": 8.805191329943805e-07, "loss": 0.3018, "step": 893 }, { "epoch": 0.11961466416911962, "grad_norm": 10.951689720153809, "learning_rate": 8.803853358308804e-07, "loss": 0.4846, "step": 894 }, { "epoch": 0.11974846133261975, "grad_norm": 23.743406295776367, "learning_rate": 8.802515386673803e-07, "loss": 0.5756, "step": 895 }, { "epoch": 0.11988225849611989, "grad_norm": 22.08819580078125, "learning_rate": 8.8011774150388e-07, "loss": 0.2635, "step": 896 }, { "epoch": 0.12001605565962002, "grad_norm": 17.475814819335938, "learning_rate": 8.799839443403799e-07, "loss": 0.6853, "step": 897 }, { "epoch": 0.12014985282312014, "grad_norm": 15.863889694213867, "learning_rate": 8.798501471768798e-07, "loss": 0.5509, "step": 898 }, { "epoch": 0.12028364998662028, "grad_norm": 10.523943901062012, "learning_rate": 8.797163500133797e-07, "loss": 0.5195, "step": 899 }, { "epoch": 0.12041744715012041, "grad_norm": 13.007387161254883, "learning_rate": 8.795825528498795e-07, "loss": 0.4941, "step": 900 }, { "epoch": 0.12055124431362055, "grad_norm": 18.050809860229492, "learning_rate": 8.794487556863794e-07, "loss": 0.4053, "step": 901 }, { "epoch": 0.12068504147712068, "grad_norm": 14.604314804077148, "learning_rate": 8.793149585228793e-07, "loss": 0.4512, "step": 902 }, { "epoch": 0.12081883864062082, "grad_norm": 29.993791580200195, "learning_rate": 8.791811613593792e-07, "loss": 0.6932, "step": 903 }, { "epoch": 0.12095263580412095, "grad_norm": 19.54190444946289, "learning_rate": 8.79047364195879e-07, "loss": 0.631, "step": 904 }, { "epoch": 0.12108643296762109, "grad_norm": 24.36077308654785, "learning_rate": 8.789135670323788e-07, "loss": 0.31, "step": 905 }, { "epoch": 0.12122023013112122, "grad_norm": 11.381570816040039, "learning_rate": 8.787797698688787e-07, "loss": 0.5264, "step": 906 }, { "epoch": 0.12135402729462136, "grad_norm": 11.581469535827637, "learning_rate": 8.786459727053787e-07, "loss": 0.3993, "step": 907 }, { "epoch": 0.1214878244581215, "grad_norm": 10.610539436340332, "learning_rate": 8.785121755418785e-07, "loss": 0.5097, "step": 908 }, { "epoch": 0.12162162162162163, "grad_norm": 12.639386177062988, "learning_rate": 8.783783783783784e-07, "loss": 0.5382, "step": 909 }, { "epoch": 0.12175541878512175, "grad_norm": 11.809029579162598, "learning_rate": 8.782445812148782e-07, "loss": 0.4034, "step": 910 }, { "epoch": 0.12188921594862188, "grad_norm": 30.465259552001953, "learning_rate": 8.781107840513781e-07, "loss": 0.5635, "step": 911 }, { "epoch": 0.12202301311212202, "grad_norm": 13.445860862731934, "learning_rate": 8.779769868878779e-07, "loss": 0.3948, "step": 912 }, { "epoch": 0.12215681027562215, "grad_norm": 6.216826438903809, "learning_rate": 8.778431897243778e-07, "loss": 0.4759, "step": 913 }, { "epoch": 0.12229060743912229, "grad_norm": 15.455144882202148, "learning_rate": 8.777093925608776e-07, "loss": 0.3508, "step": 914 }, { "epoch": 0.12242440460262242, "grad_norm": 21.092327117919922, "learning_rate": 8.775755953973776e-07, "loss": 0.5265, "step": 915 }, { "epoch": 0.12255820176612256, "grad_norm": 38.86851119995117, "learning_rate": 8.774417982338774e-07, "loss": 0.6138, "step": 916 }, { "epoch": 0.1226919989296227, "grad_norm": 18.43198013305664, "learning_rate": 8.773080010703773e-07, "loss": 0.4701, "step": 917 }, { "epoch": 0.12282579609312283, "grad_norm": 20.04564094543457, "learning_rate": 8.771742039068772e-07, "loss": 0.4875, "step": 918 }, { "epoch": 0.12295959325662296, "grad_norm": 32.59459686279297, "learning_rate": 8.770404067433769e-07, "loss": 0.601, "step": 919 }, { "epoch": 0.1230933904201231, "grad_norm": 10.82992935180664, "learning_rate": 8.769066095798768e-07, "loss": 0.5747, "step": 920 }, { "epoch": 0.12322718758362322, "grad_norm": 6.202564716339111, "learning_rate": 8.767728124163767e-07, "loss": 0.5061, "step": 921 }, { "epoch": 0.12336098474712336, "grad_norm": 27.505483627319336, "learning_rate": 8.766390152528767e-07, "loss": 0.4967, "step": 922 }, { "epoch": 0.12349478191062349, "grad_norm": 21.481840133666992, "learning_rate": 8.765052180893765e-07, "loss": 0.514, "step": 923 }, { "epoch": 0.12362857907412363, "grad_norm": 7.3607635498046875, "learning_rate": 8.763714209258763e-07, "loss": 0.6012, "step": 924 }, { "epoch": 0.12376237623762376, "grad_norm": 8.204723358154297, "learning_rate": 8.762376237623762e-07, "loss": 0.5113, "step": 925 }, { "epoch": 0.1238961734011239, "grad_norm": 37.17082977294922, "learning_rate": 8.761038265988761e-07, "loss": 0.4734, "step": 926 }, { "epoch": 0.12402997056462403, "grad_norm": 20.01595115661621, "learning_rate": 8.759700294353759e-07, "loss": 0.4041, "step": 927 }, { "epoch": 0.12416376772812417, "grad_norm": 7.644332408905029, "learning_rate": 8.758362322718757e-07, "loss": 0.4672, "step": 928 }, { "epoch": 0.1242975648916243, "grad_norm": 17.511173248291016, "learning_rate": 8.757024351083757e-07, "loss": 0.3834, "step": 929 }, { "epoch": 0.12443136205512444, "grad_norm": 13.491358757019043, "learning_rate": 8.755686379448756e-07, "loss": 0.5763, "step": 930 }, { "epoch": 0.12456515921862457, "grad_norm": 23.910215377807617, "learning_rate": 8.754348407813754e-07, "loss": 0.2393, "step": 931 }, { "epoch": 0.12469895638212469, "grad_norm": 8.159933090209961, "learning_rate": 8.753010436178753e-07, "loss": 0.4379, "step": 932 }, { "epoch": 0.12483275354562483, "grad_norm": 6.534641265869141, "learning_rate": 8.751672464543751e-07, "loss": 0.4174, "step": 933 }, { "epoch": 0.12496655070912496, "grad_norm": 20.02547836303711, "learning_rate": 8.75033449290875e-07, "loss": 0.4834, "step": 934 }, { "epoch": 0.1251003478726251, "grad_norm": 21.58654022216797, "learning_rate": 8.748996521273748e-07, "loss": 0.5616, "step": 935 }, { "epoch": 0.12523414503612523, "grad_norm": 27.923477172851562, "learning_rate": 8.747658549638747e-07, "loss": 0.6093, "step": 936 }, { "epoch": 0.12536794219962538, "grad_norm": 5.326559543609619, "learning_rate": 8.746320578003747e-07, "loss": 0.4257, "step": 937 }, { "epoch": 0.1255017393631255, "grad_norm": 31.174009323120117, "learning_rate": 8.744982606368745e-07, "loss": 0.6833, "step": 938 }, { "epoch": 0.12563553652662562, "grad_norm": 45.09873962402344, "learning_rate": 8.743644634733743e-07, "loss": 0.7042, "step": 939 }, { "epoch": 0.12576933369012577, "grad_norm": 13.858295440673828, "learning_rate": 8.742306663098742e-07, "loss": 0.2809, "step": 940 }, { "epoch": 0.1259031308536259, "grad_norm": 32.821990966796875, "learning_rate": 8.740968691463741e-07, "loss": 0.6574, "step": 941 }, { "epoch": 0.12603692801712604, "grad_norm": 14.51640510559082, "learning_rate": 8.739630719828738e-07, "loss": 0.3728, "step": 942 }, { "epoch": 0.12617072518062616, "grad_norm": 7.671949863433838, "learning_rate": 8.738292748193737e-07, "loss": 0.3399, "step": 943 }, { "epoch": 0.1263045223441263, "grad_norm": 18.952070236206055, "learning_rate": 8.736954776558737e-07, "loss": 0.3406, "step": 944 }, { "epoch": 0.12643831950762643, "grad_norm": 39.82175827026367, "learning_rate": 8.735616804923736e-07, "loss": 0.7649, "step": 945 }, { "epoch": 0.12657211667112658, "grad_norm": 18.147743225097656, "learning_rate": 8.734278833288734e-07, "loss": 0.4379, "step": 946 }, { "epoch": 0.1267059138346267, "grad_norm": 8.110132217407227, "learning_rate": 8.732940861653732e-07, "loss": 0.5138, "step": 947 }, { "epoch": 0.12683971099812685, "grad_norm": 10.797733306884766, "learning_rate": 8.731602890018731e-07, "loss": 0.5044, "step": 948 }, { "epoch": 0.12697350816162697, "grad_norm": 13.692362785339355, "learning_rate": 8.73026491838373e-07, "loss": 0.3928, "step": 949 }, { "epoch": 0.1271073053251271, "grad_norm": 20.34851837158203, "learning_rate": 8.728926946748728e-07, "loss": 0.3595, "step": 950 }, { "epoch": 0.12724110248862724, "grad_norm": 15.756446838378906, "learning_rate": 8.727588975113728e-07, "loss": 0.4623, "step": 951 }, { "epoch": 0.12737489965212737, "grad_norm": 13.65123462677002, "learning_rate": 8.726251003478726e-07, "loss": 0.6148, "step": 952 }, { "epoch": 0.12750869681562751, "grad_norm": 8.952404975891113, "learning_rate": 8.724913031843725e-07, "loss": 0.4314, "step": 953 }, { "epoch": 0.12764249397912764, "grad_norm": 13.11648941040039, "learning_rate": 8.723575060208723e-07, "loss": 0.5683, "step": 954 }, { "epoch": 0.12777629114262778, "grad_norm": 26.81349754333496, "learning_rate": 8.722237088573722e-07, "loss": 0.4468, "step": 955 }, { "epoch": 0.1279100883061279, "grad_norm": 26.578075408935547, "learning_rate": 8.72089911693872e-07, "loss": 0.6613, "step": 956 }, { "epoch": 0.12804388546962805, "grad_norm": 21.074352264404297, "learning_rate": 8.719561145303719e-07, "loss": 0.3558, "step": 957 }, { "epoch": 0.12817768263312818, "grad_norm": 24.91514778137207, "learning_rate": 8.718223173668717e-07, "loss": 0.2998, "step": 958 }, { "epoch": 0.12831147979662832, "grad_norm": 18.64313316345215, "learning_rate": 8.716885202033717e-07, "loss": 0.4124, "step": 959 }, { "epoch": 0.12844527696012845, "grad_norm": 23.609424591064453, "learning_rate": 8.715547230398716e-07, "loss": 0.5657, "step": 960 }, { "epoch": 0.12857907412362857, "grad_norm": 17.0042667388916, "learning_rate": 8.714209258763714e-07, "loss": 0.6271, "step": 961 }, { "epoch": 0.12871287128712872, "grad_norm": 13.326577186584473, "learning_rate": 8.712871287128712e-07, "loss": 0.6858, "step": 962 }, { "epoch": 0.12884666845062884, "grad_norm": 23.018367767333984, "learning_rate": 8.711533315493711e-07, "loss": 0.58, "step": 963 }, { "epoch": 0.128980465614129, "grad_norm": 32.63157272338867, "learning_rate": 8.71019534385871e-07, "loss": 0.5257, "step": 964 }, { "epoch": 0.1291142627776291, "grad_norm": 15.328292846679688, "learning_rate": 8.708857372223707e-07, "loss": 0.5313, "step": 965 }, { "epoch": 0.12924805994112926, "grad_norm": 37.222434997558594, "learning_rate": 8.707519400588707e-07, "loss": 0.6995, "step": 966 }, { "epoch": 0.12938185710462938, "grad_norm": 9.546621322631836, "learning_rate": 8.706181428953706e-07, "loss": 0.4393, "step": 967 }, { "epoch": 0.12951565426812953, "grad_norm": 16.545974731445312, "learning_rate": 8.704843457318705e-07, "loss": 0.4895, "step": 968 }, { "epoch": 0.12964945143162965, "grad_norm": 24.878074645996094, "learning_rate": 8.703505485683703e-07, "loss": 0.5066, "step": 969 }, { "epoch": 0.1297832485951298, "grad_norm": 42.40262985229492, "learning_rate": 8.702167514048701e-07, "loss": 0.3309, "step": 970 }, { "epoch": 0.12991704575862992, "grad_norm": 28.841445922851562, "learning_rate": 8.7008295424137e-07, "loss": 0.5612, "step": 971 }, { "epoch": 0.13005084292213004, "grad_norm": 8.162747383117676, "learning_rate": 8.699491570778699e-07, "loss": 0.5901, "step": 972 }, { "epoch": 0.1301846400856302, "grad_norm": 8.998857498168945, "learning_rate": 8.698153599143698e-07, "loss": 0.5005, "step": 973 }, { "epoch": 0.1303184372491303, "grad_norm": 11.464810371398926, "learning_rate": 8.696815627508697e-07, "loss": 0.5488, "step": 974 }, { "epoch": 0.13045223441263046, "grad_norm": 24.8675537109375, "learning_rate": 8.695477655873695e-07, "loss": 0.2476, "step": 975 }, { "epoch": 0.13058603157613058, "grad_norm": 9.991185188293457, "learning_rate": 8.694139684238694e-07, "loss": 0.3007, "step": 976 }, { "epoch": 0.13071982873963073, "grad_norm": 18.29839515686035, "learning_rate": 8.692801712603692e-07, "loss": 0.6639, "step": 977 }, { "epoch": 0.13085362590313085, "grad_norm": 15.95060920715332, "learning_rate": 8.691463740968691e-07, "loss": 0.2412, "step": 978 }, { "epoch": 0.130987423066631, "grad_norm": 14.732762336730957, "learning_rate": 8.69012576933369e-07, "loss": 0.3559, "step": 979 }, { "epoch": 0.13112122023013112, "grad_norm": 11.40162467956543, "learning_rate": 8.688787797698688e-07, "loss": 0.5564, "step": 980 }, { "epoch": 0.13125501739363127, "grad_norm": 29.293657302856445, "learning_rate": 8.687449826063687e-07, "loss": 0.5487, "step": 981 }, { "epoch": 0.1313888145571314, "grad_norm": 14.160679817199707, "learning_rate": 8.686111854428686e-07, "loss": 0.3896, "step": 982 }, { "epoch": 0.1315226117206315, "grad_norm": 16.967578887939453, "learning_rate": 8.684773882793685e-07, "loss": 0.3649, "step": 983 }, { "epoch": 0.13165640888413166, "grad_norm": 9.095792770385742, "learning_rate": 8.683435911158683e-07, "loss": 0.3806, "step": 984 }, { "epoch": 0.13179020604763178, "grad_norm": 40.64033126831055, "learning_rate": 8.682097939523681e-07, "loss": 0.6956, "step": 985 }, { "epoch": 0.13192400321113193, "grad_norm": 12.951752662658691, "learning_rate": 8.68075996788868e-07, "loss": 0.2025, "step": 986 }, { "epoch": 0.13205780037463205, "grad_norm": 26.58283042907715, "learning_rate": 8.679421996253679e-07, "loss": 0.4107, "step": 987 }, { "epoch": 0.1321915975381322, "grad_norm": 53.63166427612305, "learning_rate": 8.678084024618679e-07, "loss": 0.6254, "step": 988 }, { "epoch": 0.13232539470163232, "grad_norm": 35.711971282958984, "learning_rate": 8.676746052983676e-07, "loss": 0.3682, "step": 989 }, { "epoch": 0.13245919186513247, "grad_norm": 46.51295852661133, "learning_rate": 8.675408081348675e-07, "loss": 0.824, "step": 990 }, { "epoch": 0.1325929890286326, "grad_norm": 36.29909133911133, "learning_rate": 8.674070109713674e-07, "loss": 0.5857, "step": 991 }, { "epoch": 0.13272678619213274, "grad_norm": 28.761661529541016, "learning_rate": 8.672732138078673e-07, "loss": 0.4686, "step": 992 }, { "epoch": 0.13286058335563286, "grad_norm": 12.247928619384766, "learning_rate": 8.67139416644367e-07, "loss": 0.4593, "step": 993 }, { "epoch": 0.13299438051913298, "grad_norm": 12.820100784301758, "learning_rate": 8.670056194808669e-07, "loss": 0.3118, "step": 994 }, { "epoch": 0.13312817768263313, "grad_norm": 41.289424896240234, "learning_rate": 8.668718223173669e-07, "loss": 0.6129, "step": 995 }, { "epoch": 0.13326197484613325, "grad_norm": 18.10667610168457, "learning_rate": 8.667380251538667e-07, "loss": 0.4657, "step": 996 }, { "epoch": 0.1333957720096334, "grad_norm": 12.923798561096191, "learning_rate": 8.666042279903666e-07, "loss": 0.4646, "step": 997 }, { "epoch": 0.13352956917313352, "grad_norm": 7.705321788787842, "learning_rate": 8.664704308268664e-07, "loss": 0.4303, "step": 998 }, { "epoch": 0.13366336633663367, "grad_norm": 16.190807342529297, "learning_rate": 8.663366336633663e-07, "loss": 0.438, "step": 999 }, { "epoch": 0.1337971635001338, "grad_norm": 10.062698364257812, "learning_rate": 8.662028364998661e-07, "loss": 0.4077, "step": 1000 }, { "epoch": 0.13393096066363394, "grad_norm": 9.87159252166748, "learning_rate": 8.66069039336366e-07, "loss": 0.3539, "step": 1001 }, { "epoch": 0.13406475782713406, "grad_norm": 36.17155456542969, "learning_rate": 8.659352421728659e-07, "loss": 0.4959, "step": 1002 }, { "epoch": 0.1341985549906342, "grad_norm": 19.14286231994629, "learning_rate": 8.658014450093658e-07, "loss": 0.3906, "step": 1003 }, { "epoch": 0.13433235215413433, "grad_norm": 12.647249221801758, "learning_rate": 8.656676478458656e-07, "loss": 0.4478, "step": 1004 }, { "epoch": 0.13446614931763445, "grad_norm": 30.326763153076172, "learning_rate": 8.655338506823655e-07, "loss": 0.6318, "step": 1005 }, { "epoch": 0.1345999464811346, "grad_norm": 19.530895233154297, "learning_rate": 8.654000535188654e-07, "loss": 0.3343, "step": 1006 }, { "epoch": 0.13473374364463472, "grad_norm": 19.51346206665039, "learning_rate": 8.652662563553653e-07, "loss": 0.4301, "step": 1007 }, { "epoch": 0.13486754080813487, "grad_norm": 15.03439998626709, "learning_rate": 8.65132459191865e-07, "loss": 0.2735, "step": 1008 }, { "epoch": 0.135001337971635, "grad_norm": 41.0278205871582, "learning_rate": 8.649986620283649e-07, "loss": 0.6942, "step": 1009 }, { "epoch": 0.13513513513513514, "grad_norm": 39.6147575378418, "learning_rate": 8.648648648648649e-07, "loss": 0.4853, "step": 1010 }, { "epoch": 0.13526893229863526, "grad_norm": 30.77920913696289, "learning_rate": 8.647310677013648e-07, "loss": 0.6136, "step": 1011 }, { "epoch": 0.1354027294621354, "grad_norm": 19.217119216918945, "learning_rate": 8.645972705378645e-07, "loss": 0.3891, "step": 1012 }, { "epoch": 0.13553652662563553, "grad_norm": 25.110679626464844, "learning_rate": 8.644634733743644e-07, "loss": 0.5742, "step": 1013 }, { "epoch": 0.13567032378913568, "grad_norm": 14.407952308654785, "learning_rate": 8.643296762108643e-07, "loss": 0.5115, "step": 1014 }, { "epoch": 0.1358041209526358, "grad_norm": 21.575336456298828, "learning_rate": 8.641958790473642e-07, "loss": 0.4406, "step": 1015 }, { "epoch": 0.13593791811613593, "grad_norm": 33.25710678100586, "learning_rate": 8.64062081883864e-07, "loss": 0.3782, "step": 1016 }, { "epoch": 0.13607171527963607, "grad_norm": 38.40221405029297, "learning_rate": 8.639282847203639e-07, "loss": 0.3316, "step": 1017 }, { "epoch": 0.1362055124431362, "grad_norm": 25.04479217529297, "learning_rate": 8.637944875568638e-07, "loss": 0.4061, "step": 1018 }, { "epoch": 0.13633930960663634, "grad_norm": 13.173995018005371, "learning_rate": 8.636606903933637e-07, "loss": 0.5242, "step": 1019 }, { "epoch": 0.13647310677013647, "grad_norm": 20.351009368896484, "learning_rate": 8.635268932298635e-07, "loss": 0.3147, "step": 1020 }, { "epoch": 0.13660690393363661, "grad_norm": 23.574689865112305, "learning_rate": 8.633930960663633e-07, "loss": 0.6033, "step": 1021 }, { "epoch": 0.13674070109713674, "grad_norm": 31.218692779541016, "learning_rate": 8.632592989028632e-07, "loss": 0.7926, "step": 1022 }, { "epoch": 0.13687449826063688, "grad_norm": 10.046794891357422, "learning_rate": 8.63125501739363e-07, "loss": 0.5281, "step": 1023 }, { "epoch": 0.137008295424137, "grad_norm": 21.328908920288086, "learning_rate": 8.629917045758629e-07, "loss": 0.398, "step": 1024 }, { "epoch": 0.13714209258763715, "grad_norm": 10.779525756835938, "learning_rate": 8.628579074123629e-07, "loss": 0.4469, "step": 1025 }, { "epoch": 0.13727588975113728, "grad_norm": 25.661333084106445, "learning_rate": 8.627241102488627e-07, "loss": 0.5411, "step": 1026 }, { "epoch": 0.1374096869146374, "grad_norm": 15.589780807495117, "learning_rate": 8.625903130853625e-07, "loss": 0.4403, "step": 1027 }, { "epoch": 0.13754348407813755, "grad_norm": 17.590768814086914, "learning_rate": 8.624565159218624e-07, "loss": 0.5327, "step": 1028 }, { "epoch": 0.13767728124163767, "grad_norm": 21.45888900756836, "learning_rate": 8.623227187583623e-07, "loss": 0.5604, "step": 1029 }, { "epoch": 0.13781107840513782, "grad_norm": 13.883489608764648, "learning_rate": 8.621889215948622e-07, "loss": 0.4668, "step": 1030 }, { "epoch": 0.13794487556863794, "grad_norm": 12.01395034790039, "learning_rate": 8.620551244313619e-07, "loss": 0.5545, "step": 1031 }, { "epoch": 0.13807867273213809, "grad_norm": 13.00326919555664, "learning_rate": 8.619213272678619e-07, "loss": 0.3678, "step": 1032 }, { "epoch": 0.1382124698956382, "grad_norm": 9.986605644226074, "learning_rate": 8.617875301043618e-07, "loss": 0.503, "step": 1033 }, { "epoch": 0.13834626705913836, "grad_norm": 8.482954025268555, "learning_rate": 8.616537329408617e-07, "loss": 0.5175, "step": 1034 }, { "epoch": 0.13848006422263848, "grad_norm": 22.19717788696289, "learning_rate": 8.615199357773614e-07, "loss": 0.5637, "step": 1035 }, { "epoch": 0.13861386138613863, "grad_norm": 22.682371139526367, "learning_rate": 8.613861386138613e-07, "loss": 0.4559, "step": 1036 }, { "epoch": 0.13874765854963875, "grad_norm": 13.869306564331055, "learning_rate": 8.612523414503612e-07, "loss": 0.3834, "step": 1037 }, { "epoch": 0.13888145571313887, "grad_norm": 22.56285858154297, "learning_rate": 8.611185442868611e-07, "loss": 0.3933, "step": 1038 }, { "epoch": 0.13901525287663902, "grad_norm": 6.129053115844727, "learning_rate": 8.60984747123361e-07, "loss": 0.428, "step": 1039 }, { "epoch": 0.13914905004013914, "grad_norm": 25.079265594482422, "learning_rate": 8.608509499598608e-07, "loss": 0.5321, "step": 1040 }, { "epoch": 0.1392828472036393, "grad_norm": 7.910948753356934, "learning_rate": 8.607171527963607e-07, "loss": 0.4309, "step": 1041 }, { "epoch": 0.1394166443671394, "grad_norm": 14.826630592346191, "learning_rate": 8.605833556328606e-07, "loss": 0.3342, "step": 1042 }, { "epoch": 0.13955044153063956, "grad_norm": 8.682602882385254, "learning_rate": 8.604495584693604e-07, "loss": 0.3902, "step": 1043 }, { "epoch": 0.13968423869413968, "grad_norm": 28.868576049804688, "learning_rate": 8.603157613058603e-07, "loss": 0.5022, "step": 1044 }, { "epoch": 0.13981803585763983, "grad_norm": 14.435909271240234, "learning_rate": 8.601819641423601e-07, "loss": 0.5051, "step": 1045 }, { "epoch": 0.13995183302113995, "grad_norm": 42.43812942504883, "learning_rate": 8.600481669788599e-07, "loss": 0.7036, "step": 1046 }, { "epoch": 0.1400856301846401, "grad_norm": 13.412034034729004, "learning_rate": 8.599143698153599e-07, "loss": 0.3672, "step": 1047 }, { "epoch": 0.14021942734814022, "grad_norm": 9.961472511291504, "learning_rate": 8.597805726518598e-07, "loss": 0.4749, "step": 1048 }, { "epoch": 0.14035322451164034, "grad_norm": 10.667329788208008, "learning_rate": 8.596467754883597e-07, "loss": 0.3752, "step": 1049 }, { "epoch": 0.1404870216751405, "grad_norm": 14.059369087219238, "learning_rate": 8.595129783248594e-07, "loss": 0.6167, "step": 1050 }, { "epoch": 0.1406208188386406, "grad_norm": 10.44986629486084, "learning_rate": 8.593791811613593e-07, "loss": 0.4422, "step": 1051 }, { "epoch": 0.14075461600214076, "grad_norm": 33.0904655456543, "learning_rate": 8.592453839978592e-07, "loss": 0.3152, "step": 1052 }, { "epoch": 0.14088841316564088, "grad_norm": 20.93098258972168, "learning_rate": 8.591115868343591e-07, "loss": 0.4182, "step": 1053 }, { "epoch": 0.14102221032914103, "grad_norm": 17.567718505859375, "learning_rate": 8.589777896708589e-07, "loss": 0.3796, "step": 1054 }, { "epoch": 0.14115600749264115, "grad_norm": 33.02024841308594, "learning_rate": 8.588439925073588e-07, "loss": 0.7024, "step": 1055 }, { "epoch": 0.1412898046561413, "grad_norm": 20.23255729675293, "learning_rate": 8.587101953438587e-07, "loss": 0.6086, "step": 1056 }, { "epoch": 0.14142360181964142, "grad_norm": 25.354907989501953, "learning_rate": 8.585763981803586e-07, "loss": 0.4656, "step": 1057 }, { "epoch": 0.14155739898314157, "grad_norm": 10.155088424682617, "learning_rate": 8.584426010168584e-07, "loss": 0.3588, "step": 1058 }, { "epoch": 0.1416911961466417, "grad_norm": 15.883217811584473, "learning_rate": 8.583088038533582e-07, "loss": 0.5351, "step": 1059 }, { "epoch": 0.1418249933101418, "grad_norm": 29.684785842895508, "learning_rate": 8.581750066898581e-07, "loss": 0.3729, "step": 1060 }, { "epoch": 0.14195879047364196, "grad_norm": 13.419677734375, "learning_rate": 8.580412095263581e-07, "loss": 0.4137, "step": 1061 }, { "epoch": 0.14209258763714208, "grad_norm": 11.774664878845215, "learning_rate": 8.579074123628579e-07, "loss": 0.5146, "step": 1062 }, { "epoch": 0.14222638480064223, "grad_norm": 16.35268211364746, "learning_rate": 8.577736151993577e-07, "loss": 0.5844, "step": 1063 }, { "epoch": 0.14236018196414235, "grad_norm": 14.742165565490723, "learning_rate": 8.576398180358576e-07, "loss": 0.5796, "step": 1064 }, { "epoch": 0.1424939791276425, "grad_norm": 36.074371337890625, "learning_rate": 8.575060208723575e-07, "loss": 0.3556, "step": 1065 }, { "epoch": 0.14262777629114262, "grad_norm": 11.04403018951416, "learning_rate": 8.573722237088573e-07, "loss": 0.5094, "step": 1066 }, { "epoch": 0.14276157345464277, "grad_norm": 13.303543090820312, "learning_rate": 8.572384265453572e-07, "loss": 0.4431, "step": 1067 }, { "epoch": 0.1428953706181429, "grad_norm": 10.003376007080078, "learning_rate": 8.57104629381857e-07, "loss": 0.4621, "step": 1068 }, { "epoch": 0.14302916778164304, "grad_norm": 13.355795860290527, "learning_rate": 8.56970832218357e-07, "loss": 0.4834, "step": 1069 }, { "epoch": 0.14316296494514316, "grad_norm": 16.464248657226562, "learning_rate": 8.568370350548568e-07, "loss": 0.7035, "step": 1070 }, { "epoch": 0.14329676210864328, "grad_norm": 14.790122032165527, "learning_rate": 8.567032378913567e-07, "loss": 0.4619, "step": 1071 }, { "epoch": 0.14343055927214343, "grad_norm": 44.2668342590332, "learning_rate": 8.565694407278566e-07, "loss": 0.6247, "step": 1072 }, { "epoch": 0.14356435643564355, "grad_norm": 19.65631675720215, "learning_rate": 8.564356435643563e-07, "loss": 0.6015, "step": 1073 }, { "epoch": 0.1436981535991437, "grad_norm": 14.067594528198242, "learning_rate": 8.563018464008562e-07, "loss": 0.4161, "step": 1074 }, { "epoch": 0.14383195076264382, "grad_norm": 17.294401168823242, "learning_rate": 8.561680492373561e-07, "loss": 0.408, "step": 1075 }, { "epoch": 0.14396574792614397, "grad_norm": 18.10170555114746, "learning_rate": 8.560342520738561e-07, "loss": 0.4568, "step": 1076 }, { "epoch": 0.1440995450896441, "grad_norm": 33.9039306640625, "learning_rate": 8.559004549103558e-07, "loss": 0.412, "step": 1077 }, { "epoch": 0.14423334225314424, "grad_norm": 36.35660171508789, "learning_rate": 8.557666577468557e-07, "loss": 0.3614, "step": 1078 }, { "epoch": 0.14436713941664436, "grad_norm": 51.194698333740234, "learning_rate": 8.556328605833556e-07, "loss": 0.3855, "step": 1079 }, { "epoch": 0.1445009365801445, "grad_norm": 38.9460563659668, "learning_rate": 8.554990634198555e-07, "loss": 0.4795, "step": 1080 }, { "epoch": 0.14463473374364463, "grad_norm": 12.75380802154541, "learning_rate": 8.553652662563553e-07, "loss": 0.5072, "step": 1081 }, { "epoch": 0.14476853090714478, "grad_norm": 10.10112476348877, "learning_rate": 8.552314690928551e-07, "loss": 0.3576, "step": 1082 }, { "epoch": 0.1449023280706449, "grad_norm": 9.3092041015625, "learning_rate": 8.550976719293551e-07, "loss": 0.4334, "step": 1083 }, { "epoch": 0.14503612523414502, "grad_norm": 18.394838333129883, "learning_rate": 8.54963874765855e-07, "loss": 0.4417, "step": 1084 }, { "epoch": 0.14516992239764517, "grad_norm": 18.20465660095215, "learning_rate": 8.548300776023548e-07, "loss": 0.5707, "step": 1085 }, { "epoch": 0.1453037195611453, "grad_norm": 25.762022018432617, "learning_rate": 8.546962804388547e-07, "loss": 0.5788, "step": 1086 }, { "epoch": 0.14543751672464544, "grad_norm": 14.773137092590332, "learning_rate": 8.545624832753545e-07, "loss": 0.5039, "step": 1087 }, { "epoch": 0.14557131388814556, "grad_norm": 28.635982513427734, "learning_rate": 8.544286861118544e-07, "loss": 0.5883, "step": 1088 }, { "epoch": 0.1457051110516457, "grad_norm": 7.165453910827637, "learning_rate": 8.542948889483542e-07, "loss": 0.3777, "step": 1089 }, { "epoch": 0.14583890821514583, "grad_norm": 24.072002410888672, "learning_rate": 8.541610917848541e-07, "loss": 0.2719, "step": 1090 }, { "epoch": 0.14597270537864598, "grad_norm": 15.353316307067871, "learning_rate": 8.54027294621354e-07, "loss": 0.5656, "step": 1091 }, { "epoch": 0.1461065025421461, "grad_norm": 8.478251457214355, "learning_rate": 8.538934974578539e-07, "loss": 0.4995, "step": 1092 }, { "epoch": 0.14624029970564625, "grad_norm": 19.584705352783203, "learning_rate": 8.537597002943537e-07, "loss": 0.4052, "step": 1093 }, { "epoch": 0.14637409686914638, "grad_norm": 33.23139953613281, "learning_rate": 8.536259031308536e-07, "loss": 0.6209, "step": 1094 }, { "epoch": 0.1465078940326465, "grad_norm": 10.714253425598145, "learning_rate": 8.534921059673535e-07, "loss": 0.5793, "step": 1095 }, { "epoch": 0.14664169119614665, "grad_norm": 12.912341117858887, "learning_rate": 8.533583088038533e-07, "loss": 0.4313, "step": 1096 }, { "epoch": 0.14677548835964677, "grad_norm": 19.034208297729492, "learning_rate": 8.532245116403531e-07, "loss": 0.3827, "step": 1097 }, { "epoch": 0.14690928552314692, "grad_norm": 7.383565425872803, "learning_rate": 8.530907144768531e-07, "loss": 0.3985, "step": 1098 }, { "epoch": 0.14704308268664704, "grad_norm": 26.097747802734375, "learning_rate": 8.52956917313353e-07, "loss": 0.5697, "step": 1099 }, { "epoch": 0.14717687985014719, "grad_norm": 17.86811065673828, "learning_rate": 8.528231201498528e-07, "loss": 0.5018, "step": 1100 }, { "epoch": 0.1473106770136473, "grad_norm": 22.718515396118164, "learning_rate": 8.526893229863526e-07, "loss": 0.5236, "step": 1101 }, { "epoch": 0.14744447417714746, "grad_norm": 17.963829040527344, "learning_rate": 8.525555258228525e-07, "loss": 0.5027, "step": 1102 }, { "epoch": 0.14757827134064758, "grad_norm": 34.602684020996094, "learning_rate": 8.524217286593524e-07, "loss": 0.6151, "step": 1103 }, { "epoch": 0.14771206850414773, "grad_norm": 34.643470764160156, "learning_rate": 8.522879314958522e-07, "loss": 0.4101, "step": 1104 }, { "epoch": 0.14784586566764785, "grad_norm": 13.732346534729004, "learning_rate": 8.521541343323521e-07, "loss": 0.5326, "step": 1105 }, { "epoch": 0.14797966283114797, "grad_norm": 15.583259582519531, "learning_rate": 8.52020337168852e-07, "loss": 0.5521, "step": 1106 }, { "epoch": 0.14811345999464812, "grad_norm": 30.20046043395996, "learning_rate": 8.518865400053519e-07, "loss": 0.5299, "step": 1107 }, { "epoch": 0.14824725715814824, "grad_norm": 7.903931617736816, "learning_rate": 8.517527428418517e-07, "loss": 0.522, "step": 1108 }, { "epoch": 0.1483810543216484, "grad_norm": 23.922889709472656, "learning_rate": 8.516189456783516e-07, "loss": 0.3543, "step": 1109 }, { "epoch": 0.1485148514851485, "grad_norm": 12.926416397094727, "learning_rate": 8.514851485148514e-07, "loss": 0.509, "step": 1110 }, { "epoch": 0.14864864864864866, "grad_norm": 22.118497848510742, "learning_rate": 8.513513513513513e-07, "loss": 0.5498, "step": 1111 }, { "epoch": 0.14878244581214878, "grad_norm": 15.124397277832031, "learning_rate": 8.512175541878511e-07, "loss": 0.5322, "step": 1112 }, { "epoch": 0.14891624297564893, "grad_norm": 31.86571502685547, "learning_rate": 8.510837570243511e-07, "loss": 0.8267, "step": 1113 }, { "epoch": 0.14905004013914905, "grad_norm": 16.617712020874023, "learning_rate": 8.50949959860851e-07, "loss": 0.4812, "step": 1114 }, { "epoch": 0.1491838373026492, "grad_norm": 26.549129486083984, "learning_rate": 8.508161626973508e-07, "loss": 0.5245, "step": 1115 }, { "epoch": 0.14931763446614932, "grad_norm": 26.623355865478516, "learning_rate": 8.506823655338506e-07, "loss": 0.4139, "step": 1116 }, { "epoch": 0.14945143162964944, "grad_norm": 10.133277893066406, "learning_rate": 8.505485683703505e-07, "loss": 0.4911, "step": 1117 }, { "epoch": 0.1495852287931496, "grad_norm": 8.55968952178955, "learning_rate": 8.504147712068504e-07, "loss": 0.5022, "step": 1118 }, { "epoch": 0.1497190259566497, "grad_norm": 14.6524076461792, "learning_rate": 8.502809740433502e-07, "loss": 0.3357, "step": 1119 }, { "epoch": 0.14985282312014986, "grad_norm": 36.36076736450195, "learning_rate": 8.501471768798501e-07, "loss": 0.5626, "step": 1120 }, { "epoch": 0.14998662028364998, "grad_norm": 7.923619270324707, "learning_rate": 8.5001337971635e-07, "loss": 0.3469, "step": 1121 }, { "epoch": 0.15012041744715013, "grad_norm": 51.634429931640625, "learning_rate": 8.498795825528499e-07, "loss": 0.6748, "step": 1122 }, { "epoch": 0.15025421461065025, "grad_norm": 20.671955108642578, "learning_rate": 8.497457853893498e-07, "loss": 0.4114, "step": 1123 }, { "epoch": 0.1503880117741504, "grad_norm": 6.826531410217285, "learning_rate": 8.496119882258495e-07, "loss": 0.3577, "step": 1124 }, { "epoch": 0.15052180893765052, "grad_norm": 36.41133117675781, "learning_rate": 8.494781910623494e-07, "loss": 0.252, "step": 1125 }, { "epoch": 0.15065560610115067, "grad_norm": 7.488653659820557, "learning_rate": 8.493443938988493e-07, "loss": 0.4022, "step": 1126 }, { "epoch": 0.1507894032646508, "grad_norm": 28.698902130126953, "learning_rate": 8.492105967353492e-07, "loss": 0.2884, "step": 1127 }, { "epoch": 0.1509232004281509, "grad_norm": 7.585355758666992, "learning_rate": 8.490767995718491e-07, "loss": 0.3929, "step": 1128 }, { "epoch": 0.15105699759165106, "grad_norm": 55.315040588378906, "learning_rate": 8.489430024083489e-07, "loss": 0.8065, "step": 1129 }, { "epoch": 0.15119079475515118, "grad_norm": 33.136932373046875, "learning_rate": 8.488092052448488e-07, "loss": 0.6115, "step": 1130 }, { "epoch": 0.15132459191865133, "grad_norm": 31.939626693725586, "learning_rate": 8.486754080813486e-07, "loss": 0.5588, "step": 1131 }, { "epoch": 0.15145838908215145, "grad_norm": 16.03652000427246, "learning_rate": 8.485416109178485e-07, "loss": 0.3537, "step": 1132 }, { "epoch": 0.1515921862456516, "grad_norm": 35.589176177978516, "learning_rate": 8.484078137543483e-07, "loss": 0.479, "step": 1133 }, { "epoch": 0.15172598340915172, "grad_norm": 14.861717224121094, "learning_rate": 8.482740165908482e-07, "loss": 0.3361, "step": 1134 }, { "epoch": 0.15185978057265187, "grad_norm": 10.975845336914062, "learning_rate": 8.481402194273481e-07, "loss": 0.259, "step": 1135 }, { "epoch": 0.151993577736152, "grad_norm": 20.433433532714844, "learning_rate": 8.48006422263848e-07, "loss": 0.5122, "step": 1136 }, { "epoch": 0.15212737489965214, "grad_norm": 9.75679874420166, "learning_rate": 8.478726251003479e-07, "loss": 0.3721, "step": 1137 }, { "epoch": 0.15226117206315226, "grad_norm": 19.282169342041016, "learning_rate": 8.477388279368477e-07, "loss": 0.4323, "step": 1138 }, { "epoch": 0.15239496922665238, "grad_norm": 11.527981758117676, "learning_rate": 8.476050307733475e-07, "loss": 0.4772, "step": 1139 }, { "epoch": 0.15252876639015253, "grad_norm": 46.93404006958008, "learning_rate": 8.474712336098474e-07, "loss": 0.4195, "step": 1140 }, { "epoch": 0.15266256355365265, "grad_norm": 13.019043922424316, "learning_rate": 8.473374364463473e-07, "loss": 0.463, "step": 1141 }, { "epoch": 0.1527963607171528, "grad_norm": 12.439932823181152, "learning_rate": 8.472036392828473e-07, "loss": 0.5495, "step": 1142 }, { "epoch": 0.15293015788065292, "grad_norm": 28.69991683959961, "learning_rate": 8.47069842119347e-07, "loss": 0.4724, "step": 1143 }, { "epoch": 0.15306395504415307, "grad_norm": 33.052520751953125, "learning_rate": 8.469360449558469e-07, "loss": 0.3651, "step": 1144 }, { "epoch": 0.1531977522076532, "grad_norm": 14.497082710266113, "learning_rate": 8.468022477923468e-07, "loss": 0.5217, "step": 1145 }, { "epoch": 0.15333154937115334, "grad_norm": 12.106523513793945, "learning_rate": 8.466684506288467e-07, "loss": 0.5834, "step": 1146 }, { "epoch": 0.15346534653465346, "grad_norm": 12.682771682739258, "learning_rate": 8.465346534653464e-07, "loss": 0.4847, "step": 1147 }, { "epoch": 0.1535991436981536, "grad_norm": 23.12197494506836, "learning_rate": 8.464008563018463e-07, "loss": 0.4781, "step": 1148 }, { "epoch": 0.15373294086165373, "grad_norm": 21.340051651000977, "learning_rate": 8.462670591383463e-07, "loss": 0.3978, "step": 1149 }, { "epoch": 0.15386673802515385, "grad_norm": 25.02630043029785, "learning_rate": 8.461332619748462e-07, "loss": 0.4332, "step": 1150 }, { "epoch": 0.154000535188654, "grad_norm": 16.136640548706055, "learning_rate": 8.45999464811346e-07, "loss": 0.5226, "step": 1151 }, { "epoch": 0.15413433235215412, "grad_norm": 26.32077980041504, "learning_rate": 8.458656676478458e-07, "loss": 0.5325, "step": 1152 }, { "epoch": 0.15426812951565427, "grad_norm": 23.55022430419922, "learning_rate": 8.457318704843457e-07, "loss": 0.649, "step": 1153 }, { "epoch": 0.1544019266791544, "grad_norm": 17.521728515625, "learning_rate": 8.455980733208455e-07, "loss": 0.5337, "step": 1154 }, { "epoch": 0.15453572384265454, "grad_norm": 20.450679779052734, "learning_rate": 8.454642761573454e-07, "loss": 0.6387, "step": 1155 }, { "epoch": 0.15466952100615466, "grad_norm": 16.86343002319336, "learning_rate": 8.453304789938453e-07, "loss": 0.5594, "step": 1156 }, { "epoch": 0.1548033181696548, "grad_norm": 13.14422607421875, "learning_rate": 8.451966818303452e-07, "loss": 0.3462, "step": 1157 }, { "epoch": 0.15493711533315493, "grad_norm": 8.165389060974121, "learning_rate": 8.45062884666845e-07, "loss": 0.4194, "step": 1158 }, { "epoch": 0.15507091249665508, "grad_norm": 9.689484596252441, "learning_rate": 8.449290875033449e-07, "loss": 0.6384, "step": 1159 }, { "epoch": 0.1552047096601552, "grad_norm": 8.3341646194458, "learning_rate": 8.447952903398448e-07, "loss": 0.3149, "step": 1160 }, { "epoch": 0.15533850682365533, "grad_norm": 9.563849449157715, "learning_rate": 8.446614931763446e-07, "loss": 0.4857, "step": 1161 }, { "epoch": 0.15547230398715547, "grad_norm": 37.00982666015625, "learning_rate": 8.445276960128444e-07, "loss": 0.5134, "step": 1162 }, { "epoch": 0.1556061011506556, "grad_norm": 7.769920349121094, "learning_rate": 8.443938988493443e-07, "loss": 0.4134, "step": 1163 }, { "epoch": 0.15573989831415574, "grad_norm": 8.5812349319458, "learning_rate": 8.442601016858443e-07, "loss": 0.4967, "step": 1164 }, { "epoch": 0.15587369547765587, "grad_norm": 24.907833099365234, "learning_rate": 8.441263045223442e-07, "loss": 0.3632, "step": 1165 }, { "epoch": 0.15600749264115601, "grad_norm": 7.066406726837158, "learning_rate": 8.439925073588439e-07, "loss": 0.4246, "step": 1166 }, { "epoch": 0.15614128980465614, "grad_norm": 15.487794876098633, "learning_rate": 8.438587101953438e-07, "loss": 0.4532, "step": 1167 }, { "epoch": 0.15627508696815628, "grad_norm": 6.351493835449219, "learning_rate": 8.437249130318437e-07, "loss": 0.4055, "step": 1168 }, { "epoch": 0.1564088841316564, "grad_norm": 16.68856430053711, "learning_rate": 8.435911158683436e-07, "loss": 0.3326, "step": 1169 }, { "epoch": 0.15654268129515655, "grad_norm": 25.924184799194336, "learning_rate": 8.434573187048433e-07, "loss": 0.5644, "step": 1170 }, { "epoch": 0.15667647845865668, "grad_norm": 12.220633506774902, "learning_rate": 8.433235215413432e-07, "loss": 0.4726, "step": 1171 }, { "epoch": 0.1568102756221568, "grad_norm": 19.838268280029297, "learning_rate": 8.431897243778432e-07, "loss": 0.2553, "step": 1172 }, { "epoch": 0.15694407278565695, "grad_norm": 25.37040901184082, "learning_rate": 8.430559272143431e-07, "loss": 0.5913, "step": 1173 }, { "epoch": 0.15707786994915707, "grad_norm": 16.33774757385254, "learning_rate": 8.429221300508429e-07, "loss": 0.553, "step": 1174 }, { "epoch": 0.15721166711265722, "grad_norm": 15.231478691101074, "learning_rate": 8.427883328873427e-07, "loss": 0.286, "step": 1175 }, { "epoch": 0.15734546427615734, "grad_norm": 28.28806495666504, "learning_rate": 8.426545357238426e-07, "loss": 0.3721, "step": 1176 }, { "epoch": 0.1574792614396575, "grad_norm": 30.583473205566406, "learning_rate": 8.425207385603424e-07, "loss": 0.5323, "step": 1177 }, { "epoch": 0.1576130586031576, "grad_norm": 21.03624153137207, "learning_rate": 8.423869413968423e-07, "loss": 0.3899, "step": 1178 }, { "epoch": 0.15774685576665776, "grad_norm": 15.871524810791016, "learning_rate": 8.422531442333423e-07, "loss": 0.6653, "step": 1179 }, { "epoch": 0.15788065293015788, "grad_norm": 15.541633605957031, "learning_rate": 8.421193470698421e-07, "loss": 0.3158, "step": 1180 }, { "epoch": 0.15801445009365803, "grad_norm": 14.04244327545166, "learning_rate": 8.419855499063419e-07, "loss": 0.5809, "step": 1181 }, { "epoch": 0.15814824725715815, "grad_norm": 19.90383529663086, "learning_rate": 8.418517527428418e-07, "loss": 0.4375, "step": 1182 }, { "epoch": 0.15828204442065827, "grad_norm": 37.68873596191406, "learning_rate": 8.417179555793417e-07, "loss": 0.5933, "step": 1183 }, { "epoch": 0.15841584158415842, "grad_norm": 20.392383575439453, "learning_rate": 8.415841584158416e-07, "loss": 0.6522, "step": 1184 }, { "epoch": 0.15854963874765854, "grad_norm": 22.111127853393555, "learning_rate": 8.414503612523413e-07, "loss": 0.397, "step": 1185 }, { "epoch": 0.1586834359111587, "grad_norm": 27.344785690307617, "learning_rate": 8.413165640888413e-07, "loss": 0.5001, "step": 1186 }, { "epoch": 0.1588172330746588, "grad_norm": 9.45525074005127, "learning_rate": 8.411827669253412e-07, "loss": 0.4437, "step": 1187 }, { "epoch": 0.15895103023815896, "grad_norm": 21.442766189575195, "learning_rate": 8.410489697618411e-07, "loss": 0.355, "step": 1188 }, { "epoch": 0.15908482740165908, "grad_norm": 11.071072578430176, "learning_rate": 8.409151725983408e-07, "loss": 0.3007, "step": 1189 }, { "epoch": 0.15921862456515923, "grad_norm": 36.77924728393555, "learning_rate": 8.407813754348407e-07, "loss": 0.5884, "step": 1190 }, { "epoch": 0.15935242172865935, "grad_norm": 49.30533218383789, "learning_rate": 8.406475782713406e-07, "loss": 0.8238, "step": 1191 }, { "epoch": 0.1594862188921595, "grad_norm": 16.872594833374023, "learning_rate": 8.405137811078405e-07, "loss": 0.3115, "step": 1192 }, { "epoch": 0.15962001605565962, "grad_norm": 27.2239933013916, "learning_rate": 8.403799839443403e-07, "loss": 0.6358, "step": 1193 }, { "epoch": 0.15975381321915974, "grad_norm": 10.148818016052246, "learning_rate": 8.402461867808402e-07, "loss": 0.4744, "step": 1194 }, { "epoch": 0.1598876103826599, "grad_norm": 8.998713493347168, "learning_rate": 8.401123896173401e-07, "loss": 0.3465, "step": 1195 }, { "epoch": 0.16002140754616, "grad_norm": 27.172130584716797, "learning_rate": 8.3997859245384e-07, "loss": 0.5086, "step": 1196 }, { "epoch": 0.16015520470966016, "grad_norm": 15.006403923034668, "learning_rate": 8.398447952903398e-07, "loss": 0.5742, "step": 1197 }, { "epoch": 0.16028900187316028, "grad_norm": 9.597895622253418, "learning_rate": 8.397109981268397e-07, "loss": 0.4642, "step": 1198 }, { "epoch": 0.16042279903666043, "grad_norm": 11.5714693069458, "learning_rate": 8.395772009633395e-07, "loss": 0.496, "step": 1199 }, { "epoch": 0.16055659620016055, "grad_norm": 11.514792442321777, "learning_rate": 8.394434037998393e-07, "loss": 0.5023, "step": 1200 }, { "epoch": 0.1606903933636607, "grad_norm": 25.158241271972656, "learning_rate": 8.393096066363393e-07, "loss": 0.6924, "step": 1201 }, { "epoch": 0.16082419052716082, "grad_norm": 18.205537796020508, "learning_rate": 8.391758094728392e-07, "loss": 0.43, "step": 1202 }, { "epoch": 0.16095798769066097, "grad_norm": 25.58487319946289, "learning_rate": 8.39042012309339e-07, "loss": 0.5229, "step": 1203 }, { "epoch": 0.1610917848541611, "grad_norm": 17.977558135986328, "learning_rate": 8.389082151458388e-07, "loss": 0.4516, "step": 1204 }, { "epoch": 0.1612255820176612, "grad_norm": 53.67992401123047, "learning_rate": 8.387744179823387e-07, "loss": 0.4348, "step": 1205 }, { "epoch": 0.16135937918116136, "grad_norm": 39.767189025878906, "learning_rate": 8.386406208188386e-07, "loss": 0.443, "step": 1206 }, { "epoch": 0.16149317634466148, "grad_norm": 18.44748306274414, "learning_rate": 8.385068236553385e-07, "loss": 0.5178, "step": 1207 }, { "epoch": 0.16162697350816163, "grad_norm": 12.527310371398926, "learning_rate": 8.383730264918383e-07, "loss": 0.4699, "step": 1208 }, { "epoch": 0.16176077067166175, "grad_norm": 22.511720657348633, "learning_rate": 8.382392293283382e-07, "loss": 0.5315, "step": 1209 }, { "epoch": 0.1618945678351619, "grad_norm": 10.538710594177246, "learning_rate": 8.381054321648381e-07, "loss": 0.4903, "step": 1210 }, { "epoch": 0.16202836499866202, "grad_norm": 52.944671630859375, "learning_rate": 8.37971635001338e-07, "loss": 0.7606, "step": 1211 }, { "epoch": 0.16216216216216217, "grad_norm": 16.020965576171875, "learning_rate": 8.378378378378377e-07, "loss": 0.3954, "step": 1212 }, { "epoch": 0.1622959593256623, "grad_norm": 8.796537399291992, "learning_rate": 8.377040406743376e-07, "loss": 0.4446, "step": 1213 }, { "epoch": 0.16242975648916244, "grad_norm": 20.03411293029785, "learning_rate": 8.375702435108375e-07, "loss": 0.4719, "step": 1214 }, { "epoch": 0.16256355365266256, "grad_norm": 17.982542037963867, "learning_rate": 8.374364463473374e-07, "loss": 0.4395, "step": 1215 }, { "epoch": 0.16269735081616268, "grad_norm": 22.838237762451172, "learning_rate": 8.373026491838373e-07, "loss": 0.5441, "step": 1216 }, { "epoch": 0.16283114797966283, "grad_norm": 19.075824737548828, "learning_rate": 8.371688520203371e-07, "loss": 0.4745, "step": 1217 }, { "epoch": 0.16296494514316295, "grad_norm": 14.672065734863281, "learning_rate": 8.37035054856837e-07, "loss": 0.364, "step": 1218 }, { "epoch": 0.1630987423066631, "grad_norm": 21.91503143310547, "learning_rate": 8.369012576933369e-07, "loss": 0.5055, "step": 1219 }, { "epoch": 0.16323253947016322, "grad_norm": 9.639630317687988, "learning_rate": 8.367674605298367e-07, "loss": 0.4489, "step": 1220 }, { "epoch": 0.16336633663366337, "grad_norm": 10.625786781311035, "learning_rate": 8.366336633663366e-07, "loss": 0.4738, "step": 1221 }, { "epoch": 0.1635001337971635, "grad_norm": 18.060623168945312, "learning_rate": 8.364998662028364e-07, "loss": 0.5142, "step": 1222 }, { "epoch": 0.16363393096066364, "grad_norm": 22.906064987182617, "learning_rate": 8.363660690393364e-07, "loss": 0.5288, "step": 1223 }, { "epoch": 0.16376772812416376, "grad_norm": 9.946428298950195, "learning_rate": 8.362322718758362e-07, "loss": 0.4846, "step": 1224 }, { "epoch": 0.1639015252876639, "grad_norm": 29.317768096923828, "learning_rate": 8.360984747123361e-07, "loss": 0.3425, "step": 1225 }, { "epoch": 0.16403532245116403, "grad_norm": 22.014577865600586, "learning_rate": 8.35964677548836e-07, "loss": 0.5081, "step": 1226 }, { "epoch": 0.16416911961466418, "grad_norm": 15.025308609008789, "learning_rate": 8.358308803853358e-07, "loss": 0.552, "step": 1227 }, { "epoch": 0.1643029167781643, "grad_norm": 19.02707290649414, "learning_rate": 8.356970832218356e-07, "loss": 0.4463, "step": 1228 }, { "epoch": 0.16443671394166443, "grad_norm": 10.105844497680664, "learning_rate": 8.355632860583355e-07, "loss": 0.4895, "step": 1229 }, { "epoch": 0.16457051110516457, "grad_norm": 7.376011371612549, "learning_rate": 8.354294888948355e-07, "loss": 0.4312, "step": 1230 }, { "epoch": 0.1647043082686647, "grad_norm": 25.315393447875977, "learning_rate": 8.352956917313352e-07, "loss": 0.5132, "step": 1231 }, { "epoch": 0.16483810543216484, "grad_norm": 27.28465461730957, "learning_rate": 8.351618945678351e-07, "loss": 0.7335, "step": 1232 }, { "epoch": 0.16497190259566497, "grad_norm": 12.462003707885742, "learning_rate": 8.35028097404335e-07, "loss": 0.3312, "step": 1233 }, { "epoch": 0.16510569975916511, "grad_norm": 12.181848526000977, "learning_rate": 8.348943002408349e-07, "loss": 0.3174, "step": 1234 }, { "epoch": 0.16523949692266524, "grad_norm": 10.845577239990234, "learning_rate": 8.347605030773347e-07, "loss": 0.4318, "step": 1235 }, { "epoch": 0.16537329408616538, "grad_norm": 45.299007415771484, "learning_rate": 8.346267059138345e-07, "loss": 0.6402, "step": 1236 }, { "epoch": 0.1655070912496655, "grad_norm": 7.354672431945801, "learning_rate": 8.344929087503344e-07, "loss": 0.341, "step": 1237 }, { "epoch": 0.16564088841316565, "grad_norm": 26.365955352783203, "learning_rate": 8.343591115868344e-07, "loss": 0.3831, "step": 1238 }, { "epoch": 0.16577468557666578, "grad_norm": 12.851924896240234, "learning_rate": 8.342253144233342e-07, "loss": 0.4338, "step": 1239 }, { "epoch": 0.1659084827401659, "grad_norm": 9.319581985473633, "learning_rate": 8.34091517259834e-07, "loss": 0.3211, "step": 1240 }, { "epoch": 0.16604227990366605, "grad_norm": 16.002864837646484, "learning_rate": 8.339577200963339e-07, "loss": 0.4527, "step": 1241 }, { "epoch": 0.16617607706716617, "grad_norm": 14.111757278442383, "learning_rate": 8.338239229328338e-07, "loss": 0.3522, "step": 1242 }, { "epoch": 0.16630987423066632, "grad_norm": 22.0786190032959, "learning_rate": 8.336901257693336e-07, "loss": 0.5146, "step": 1243 }, { "epoch": 0.16644367139416644, "grad_norm": 25.836441040039062, "learning_rate": 8.335563286058335e-07, "loss": 0.7433, "step": 1244 }, { "epoch": 0.16657746855766659, "grad_norm": 35.43021774291992, "learning_rate": 8.334225314423334e-07, "loss": 0.6278, "step": 1245 }, { "epoch": 0.1667112657211667, "grad_norm": 15.22924518585205, "learning_rate": 8.332887342788333e-07, "loss": 0.5091, "step": 1246 }, { "epoch": 0.16684506288466686, "grad_norm": 28.820348739624023, "learning_rate": 8.331549371153331e-07, "loss": 0.5596, "step": 1247 }, { "epoch": 0.16697886004816698, "grad_norm": 8.716984748840332, "learning_rate": 8.33021139951833e-07, "loss": 0.329, "step": 1248 }, { "epoch": 0.16711265721166713, "grad_norm": 25.061199188232422, "learning_rate": 8.328873427883329e-07, "loss": 0.675, "step": 1249 }, { "epoch": 0.16724645437516725, "grad_norm": 9.885146141052246, "learning_rate": 8.327535456248327e-07, "loss": 0.4011, "step": 1250 }, { "epoch": 0.16738025153866737, "grad_norm": 13.676265716552734, "learning_rate": 8.326197484613325e-07, "loss": 0.4247, "step": 1251 }, { "epoch": 0.16751404870216752, "grad_norm": 24.87615394592285, "learning_rate": 8.324859512978325e-07, "loss": 0.7431, "step": 1252 }, { "epoch": 0.16764784586566764, "grad_norm": 14.36215591430664, "learning_rate": 8.323521541343324e-07, "loss": 0.669, "step": 1253 }, { "epoch": 0.1677816430291678, "grad_norm": 48.733917236328125, "learning_rate": 8.322183569708323e-07, "loss": 0.5174, "step": 1254 }, { "epoch": 0.1679154401926679, "grad_norm": 47.73424530029297, "learning_rate": 8.32084559807332e-07, "loss": 0.4774, "step": 1255 }, { "epoch": 0.16804923735616806, "grad_norm": 37.06236267089844, "learning_rate": 8.319507626438319e-07, "loss": 0.4878, "step": 1256 }, { "epoch": 0.16818303451966818, "grad_norm": 17.778940200805664, "learning_rate": 8.318169654803318e-07, "loss": 0.5482, "step": 1257 }, { "epoch": 0.16831683168316833, "grad_norm": 32.56419372558594, "learning_rate": 8.316831683168316e-07, "loss": 0.4851, "step": 1258 }, { "epoch": 0.16845062884666845, "grad_norm": 27.7596378326416, "learning_rate": 8.315493711533314e-07, "loss": 0.4589, "step": 1259 }, { "epoch": 0.1685844260101686, "grad_norm": 51.447330474853516, "learning_rate": 8.314155739898314e-07, "loss": 0.6208, "step": 1260 }, { "epoch": 0.16871822317366872, "grad_norm": 11.954805374145508, "learning_rate": 8.312817768263313e-07, "loss": 0.4053, "step": 1261 }, { "epoch": 0.16885202033716884, "grad_norm": 39.65531539916992, "learning_rate": 8.311479796628311e-07, "loss": 0.4385, "step": 1262 }, { "epoch": 0.168985817500669, "grad_norm": 37.66021728515625, "learning_rate": 8.31014182499331e-07, "loss": 0.3063, "step": 1263 }, { "epoch": 0.1691196146641691, "grad_norm": 31.019758224487305, "learning_rate": 8.308803853358308e-07, "loss": 0.6143, "step": 1264 }, { "epoch": 0.16925341182766926, "grad_norm": 9.83707332611084, "learning_rate": 8.307465881723307e-07, "loss": 0.2821, "step": 1265 }, { "epoch": 0.16938720899116938, "grad_norm": 18.14883804321289, "learning_rate": 8.306127910088305e-07, "loss": 0.4935, "step": 1266 }, { "epoch": 0.16952100615466953, "grad_norm": 13.94038200378418, "learning_rate": 8.304789938453305e-07, "loss": 0.4382, "step": 1267 }, { "epoch": 0.16965480331816965, "grad_norm": 15.667008399963379, "learning_rate": 8.303451966818304e-07, "loss": 0.5133, "step": 1268 }, { "epoch": 0.1697886004816698, "grad_norm": 16.529464721679688, "learning_rate": 8.302113995183302e-07, "loss": 0.403, "step": 1269 }, { "epoch": 0.16992239764516992, "grad_norm": 16.495519638061523, "learning_rate": 8.3007760235483e-07, "loss": 0.5365, "step": 1270 }, { "epoch": 0.17005619480867007, "grad_norm": 22.718257904052734, "learning_rate": 8.299438051913299e-07, "loss": 0.3479, "step": 1271 }, { "epoch": 0.1701899919721702, "grad_norm": 7.401376247406006, "learning_rate": 8.298100080278298e-07, "loss": 0.4194, "step": 1272 }, { "epoch": 0.1703237891356703, "grad_norm": 22.606517791748047, "learning_rate": 8.296762108643296e-07, "loss": 0.4094, "step": 1273 }, { "epoch": 0.17045758629917046, "grad_norm": 11.666610717773438, "learning_rate": 8.295424137008295e-07, "loss": 0.5459, "step": 1274 }, { "epoch": 0.17059138346267058, "grad_norm": 20.465049743652344, "learning_rate": 8.294086165373294e-07, "loss": 0.5369, "step": 1275 }, { "epoch": 0.17072518062617073, "grad_norm": 16.057559967041016, "learning_rate": 8.292748193738293e-07, "loss": 0.5478, "step": 1276 }, { "epoch": 0.17085897778967085, "grad_norm": 16.71184730529785, "learning_rate": 8.291410222103292e-07, "loss": 0.3673, "step": 1277 }, { "epoch": 0.170992774953171, "grad_norm": 11.147367477416992, "learning_rate": 8.290072250468289e-07, "loss": 0.4973, "step": 1278 }, { "epoch": 0.17112657211667112, "grad_norm": 29.759620666503906, "learning_rate": 8.288734278833288e-07, "loss": 0.404, "step": 1279 }, { "epoch": 0.17126036928017127, "grad_norm": 11.533368110656738, "learning_rate": 8.287396307198287e-07, "loss": 0.5713, "step": 1280 }, { "epoch": 0.1713941664436714, "grad_norm": 10.25835132598877, "learning_rate": 8.286058335563285e-07, "loss": 0.5058, "step": 1281 }, { "epoch": 0.17152796360717154, "grad_norm": 9.598968505859375, "learning_rate": 8.284720363928285e-07, "loss": 0.3868, "step": 1282 }, { "epoch": 0.17166176077067166, "grad_norm": 20.36260414123535, "learning_rate": 8.283382392293283e-07, "loss": 0.5213, "step": 1283 }, { "epoch": 0.17179555793417178, "grad_norm": 17.926895141601562, "learning_rate": 8.282044420658282e-07, "loss": 0.3535, "step": 1284 }, { "epoch": 0.17192935509767193, "grad_norm": 6.58014440536499, "learning_rate": 8.28070644902328e-07, "loss": 0.3613, "step": 1285 }, { "epoch": 0.17206315226117205, "grad_norm": 10.830123901367188, "learning_rate": 8.279368477388279e-07, "loss": 0.6094, "step": 1286 }, { "epoch": 0.1721969494246722, "grad_norm": 11.114612579345703, "learning_rate": 8.278030505753277e-07, "loss": 0.3668, "step": 1287 }, { "epoch": 0.17233074658817232, "grad_norm": 11.912664413452148, "learning_rate": 8.276692534118276e-07, "loss": 0.4027, "step": 1288 }, { "epoch": 0.17246454375167247, "grad_norm": 26.8795223236084, "learning_rate": 8.275354562483275e-07, "loss": 0.615, "step": 1289 }, { "epoch": 0.1725983409151726, "grad_norm": 24.130773544311523, "learning_rate": 8.274016590848274e-07, "loss": 0.3728, "step": 1290 }, { "epoch": 0.17273213807867274, "grad_norm": 18.41604232788086, "learning_rate": 8.272678619213273e-07, "loss": 0.4275, "step": 1291 }, { "epoch": 0.17286593524217286, "grad_norm": 18.517688751220703, "learning_rate": 8.271340647578271e-07, "loss": 0.3713, "step": 1292 }, { "epoch": 0.172999732405673, "grad_norm": 7.809110164642334, "learning_rate": 8.270002675943269e-07, "loss": 0.4426, "step": 1293 }, { "epoch": 0.17313352956917313, "grad_norm": 20.331743240356445, "learning_rate": 8.268664704308268e-07, "loss": 0.3548, "step": 1294 }, { "epoch": 0.17326732673267325, "grad_norm": 33.787418365478516, "learning_rate": 8.267326732673267e-07, "loss": 0.5799, "step": 1295 }, { "epoch": 0.1734011238961734, "grad_norm": 12.619297981262207, "learning_rate": 8.265988761038267e-07, "loss": 0.2911, "step": 1296 }, { "epoch": 0.17353492105967352, "grad_norm": 8.109816551208496, "learning_rate": 8.264650789403264e-07, "loss": 0.3855, "step": 1297 }, { "epoch": 0.17366871822317367, "grad_norm": 12.094438552856445, "learning_rate": 8.263312817768263e-07, "loss": 0.2876, "step": 1298 }, { "epoch": 0.1738025153866738, "grad_norm": 21.975374221801758, "learning_rate": 8.261974846133262e-07, "loss": 0.4437, "step": 1299 }, { "epoch": 0.17393631255017394, "grad_norm": 9.616321563720703, "learning_rate": 8.260636874498261e-07, "loss": 0.2912, "step": 1300 }, { "epoch": 0.17407010971367406, "grad_norm": 12.022823333740234, "learning_rate": 8.259298902863258e-07, "loss": 0.3932, "step": 1301 }, { "epoch": 0.1742039068771742, "grad_norm": 9.577276229858398, "learning_rate": 8.257960931228257e-07, "loss": 0.4043, "step": 1302 }, { "epoch": 0.17433770404067434, "grad_norm": 25.126062393188477, "learning_rate": 8.256622959593256e-07, "loss": 0.4556, "step": 1303 }, { "epoch": 0.17447150120417448, "grad_norm": 20.478534698486328, "learning_rate": 8.255284987958256e-07, "loss": 0.6893, "step": 1304 }, { "epoch": 0.1746052983676746, "grad_norm": 25.433347702026367, "learning_rate": 8.253947016323254e-07, "loss": 0.6345, "step": 1305 }, { "epoch": 0.17473909553117473, "grad_norm": 13.325934410095215, "learning_rate": 8.252609044688252e-07, "loss": 0.4642, "step": 1306 }, { "epoch": 0.17487289269467488, "grad_norm": 10.705042839050293, "learning_rate": 8.251271073053251e-07, "loss": 0.5208, "step": 1307 }, { "epoch": 0.175006689858175, "grad_norm": 16.893617630004883, "learning_rate": 8.249933101418249e-07, "loss": 0.5292, "step": 1308 }, { "epoch": 0.17514048702167515, "grad_norm": 15.327486038208008, "learning_rate": 8.248595129783248e-07, "loss": 0.4774, "step": 1309 }, { "epoch": 0.17527428418517527, "grad_norm": 20.085657119750977, "learning_rate": 8.247257158148246e-07, "loss": 0.5187, "step": 1310 }, { "epoch": 0.17540808134867542, "grad_norm": 10.90278434753418, "learning_rate": 8.245919186513246e-07, "loss": 0.4753, "step": 1311 }, { "epoch": 0.17554187851217554, "grad_norm": 36.67424392700195, "learning_rate": 8.244581214878244e-07, "loss": 0.4286, "step": 1312 }, { "epoch": 0.17567567567567569, "grad_norm": 50.140342712402344, "learning_rate": 8.243243243243243e-07, "loss": 0.6445, "step": 1313 }, { "epoch": 0.1758094728391758, "grad_norm": 11.505818367004395, "learning_rate": 8.241905271608242e-07, "loss": 0.7175, "step": 1314 }, { "epoch": 0.17594327000267596, "grad_norm": 25.252897262573242, "learning_rate": 8.24056729997324e-07, "loss": 0.5514, "step": 1315 }, { "epoch": 0.17607706716617608, "grad_norm": 31.716211318969727, "learning_rate": 8.239229328338238e-07, "loss": 0.3677, "step": 1316 }, { "epoch": 0.1762108643296762, "grad_norm": 43.01708221435547, "learning_rate": 8.237891356703237e-07, "loss": 0.3319, "step": 1317 }, { "epoch": 0.17634466149317635, "grad_norm": 27.033306121826172, "learning_rate": 8.236553385068237e-07, "loss": 0.5939, "step": 1318 }, { "epoch": 0.17647845865667647, "grad_norm": 21.385400772094727, "learning_rate": 8.235215413433236e-07, "loss": 0.4841, "step": 1319 }, { "epoch": 0.17661225582017662, "grad_norm": 28.517467498779297, "learning_rate": 8.233877441798233e-07, "loss": 0.4857, "step": 1320 }, { "epoch": 0.17674605298367674, "grad_norm": 12.941689491271973, "learning_rate": 8.232539470163232e-07, "loss": 0.4697, "step": 1321 }, { "epoch": 0.1768798501471769, "grad_norm": 8.665266036987305, "learning_rate": 8.231201498528231e-07, "loss": 0.4428, "step": 1322 }, { "epoch": 0.177013647310677, "grad_norm": 22.820016860961914, "learning_rate": 8.22986352689323e-07, "loss": 0.5499, "step": 1323 }, { "epoch": 0.17714744447417716, "grad_norm": 15.791969299316406, "learning_rate": 8.228525555258227e-07, "loss": 0.3936, "step": 1324 }, { "epoch": 0.17728124163767728, "grad_norm": 6.150784969329834, "learning_rate": 8.227187583623226e-07, "loss": 0.3775, "step": 1325 }, { "epoch": 0.17741503880117743, "grad_norm": 11.986197471618652, "learning_rate": 8.225849611988226e-07, "loss": 0.3302, "step": 1326 }, { "epoch": 0.17754883596467755, "grad_norm": 38.63036346435547, "learning_rate": 8.224511640353225e-07, "loss": 0.511, "step": 1327 }, { "epoch": 0.17768263312817767, "grad_norm": 31.263273239135742, "learning_rate": 8.223173668718223e-07, "loss": 0.5693, "step": 1328 }, { "epoch": 0.17781643029167782, "grad_norm": 15.055012702941895, "learning_rate": 8.221835697083221e-07, "loss": 0.2838, "step": 1329 }, { "epoch": 0.17795022745517794, "grad_norm": 13.048839569091797, "learning_rate": 8.22049772544822e-07, "loss": 0.4048, "step": 1330 }, { "epoch": 0.1780840246186781, "grad_norm": 15.289095878601074, "learning_rate": 8.219159753813219e-07, "loss": 0.3716, "step": 1331 }, { "epoch": 0.1782178217821782, "grad_norm": 14.137447357177734, "learning_rate": 8.217821782178217e-07, "loss": 0.3637, "step": 1332 }, { "epoch": 0.17835161894567836, "grad_norm": 25.65001106262207, "learning_rate": 8.216483810543217e-07, "loss": 0.7446, "step": 1333 }, { "epoch": 0.17848541610917848, "grad_norm": 39.53636932373047, "learning_rate": 8.215145838908215e-07, "loss": 0.4767, "step": 1334 }, { "epoch": 0.17861921327267863, "grad_norm": 34.105567932128906, "learning_rate": 8.213807867273213e-07, "loss": 0.7779, "step": 1335 }, { "epoch": 0.17875301043617875, "grad_norm": 12.028942108154297, "learning_rate": 8.212469895638212e-07, "loss": 0.3927, "step": 1336 }, { "epoch": 0.1788868075996789, "grad_norm": 20.95892906188965, "learning_rate": 8.211131924003211e-07, "loss": 0.1698, "step": 1337 }, { "epoch": 0.17902060476317902, "grad_norm": 20.529605865478516, "learning_rate": 8.20979395236821e-07, "loss": 0.5412, "step": 1338 }, { "epoch": 0.17915440192667914, "grad_norm": 44.151309967041016, "learning_rate": 8.208455980733207e-07, "loss": 0.5476, "step": 1339 }, { "epoch": 0.1792881990901793, "grad_norm": 17.265583038330078, "learning_rate": 8.207118009098207e-07, "loss": 0.6306, "step": 1340 }, { "epoch": 0.1794219962536794, "grad_norm": 21.290508270263672, "learning_rate": 8.205780037463206e-07, "loss": 0.4098, "step": 1341 }, { "epoch": 0.17955579341717956, "grad_norm": 14.437223434448242, "learning_rate": 8.204442065828205e-07, "loss": 0.5154, "step": 1342 }, { "epoch": 0.17968959058067968, "grad_norm": 32.93992233276367, "learning_rate": 8.203104094193202e-07, "loss": 0.3462, "step": 1343 }, { "epoch": 0.17982338774417983, "grad_norm": 20.34821891784668, "learning_rate": 8.201766122558201e-07, "loss": 0.4584, "step": 1344 }, { "epoch": 0.17995718490767995, "grad_norm": 30.394454956054688, "learning_rate": 8.2004281509232e-07, "loss": 0.4003, "step": 1345 }, { "epoch": 0.1800909820711801, "grad_norm": 15.374349594116211, "learning_rate": 8.199090179288199e-07, "loss": 0.3689, "step": 1346 }, { "epoch": 0.18022477923468022, "grad_norm": 36.22574996948242, "learning_rate": 8.197752207653196e-07, "loss": 0.5021, "step": 1347 }, { "epoch": 0.18035857639818037, "grad_norm": 15.654786109924316, "learning_rate": 8.196414236018196e-07, "loss": 0.277, "step": 1348 }, { "epoch": 0.1804923735616805, "grad_norm": 22.989919662475586, "learning_rate": 8.195076264383195e-07, "loss": 0.5461, "step": 1349 }, { "epoch": 0.1806261707251806, "grad_norm": 12.258338928222656, "learning_rate": 8.193738292748194e-07, "loss": 0.3034, "step": 1350 }, { "epoch": 0.18075996788868076, "grad_norm": 24.079668045043945, "learning_rate": 8.192400321113192e-07, "loss": 0.3875, "step": 1351 }, { "epoch": 0.18089376505218088, "grad_norm": 26.55644416809082, "learning_rate": 8.19106234947819e-07, "loss": 0.2882, "step": 1352 }, { "epoch": 0.18102756221568103, "grad_norm": 23.90664291381836, "learning_rate": 8.189724377843189e-07, "loss": 0.5084, "step": 1353 }, { "epoch": 0.18116135937918115, "grad_norm": 15.42706298828125, "learning_rate": 8.188386406208188e-07, "loss": 0.4306, "step": 1354 }, { "epoch": 0.1812951565426813, "grad_norm": 20.856937408447266, "learning_rate": 8.187048434573187e-07, "loss": 0.6358, "step": 1355 }, { "epoch": 0.18142895370618142, "grad_norm": 14.300140380859375, "learning_rate": 8.185710462938186e-07, "loss": 0.4571, "step": 1356 }, { "epoch": 0.18156275086968157, "grad_norm": 26.26179313659668, "learning_rate": 8.184372491303184e-07, "loss": 0.4423, "step": 1357 }, { "epoch": 0.1816965480331817, "grad_norm": 17.437664031982422, "learning_rate": 8.183034519668183e-07, "loss": 0.4713, "step": 1358 }, { "epoch": 0.18183034519668184, "grad_norm": 12.989599227905273, "learning_rate": 8.181696548033181e-07, "loss": 0.4504, "step": 1359 }, { "epoch": 0.18196414236018196, "grad_norm": 13.15501594543457, "learning_rate": 8.18035857639818e-07, "loss": 0.4792, "step": 1360 }, { "epoch": 0.18209793952368208, "grad_norm": 12.045741081237793, "learning_rate": 8.179020604763179e-07, "loss": 0.4743, "step": 1361 }, { "epoch": 0.18223173668718223, "grad_norm": 12.916608810424805, "learning_rate": 8.177682633128177e-07, "loss": 0.3453, "step": 1362 }, { "epoch": 0.18236553385068235, "grad_norm": 48.19664001464844, "learning_rate": 8.176344661493176e-07, "loss": 0.6907, "step": 1363 }, { "epoch": 0.1824993310141825, "grad_norm": 16.68502426147461, "learning_rate": 8.175006689858175e-07, "loss": 0.47, "step": 1364 }, { "epoch": 0.18263312817768262, "grad_norm": 15.0518159866333, "learning_rate": 8.173668718223174e-07, "loss": 0.3898, "step": 1365 }, { "epoch": 0.18276692534118277, "grad_norm": 11.839590072631836, "learning_rate": 8.172330746588171e-07, "loss": 0.4951, "step": 1366 }, { "epoch": 0.1829007225046829, "grad_norm": 12.780031204223633, "learning_rate": 8.17099277495317e-07, "loss": 0.368, "step": 1367 }, { "epoch": 0.18303451966818304, "grad_norm": 17.049280166625977, "learning_rate": 8.169654803318169e-07, "loss": 0.6605, "step": 1368 }, { "epoch": 0.18316831683168316, "grad_norm": 21.889921188354492, "learning_rate": 8.168316831683168e-07, "loss": 0.5667, "step": 1369 }, { "epoch": 0.1833021139951833, "grad_norm": 32.23612594604492, "learning_rate": 8.166978860048167e-07, "loss": 0.3399, "step": 1370 }, { "epoch": 0.18343591115868343, "grad_norm": 10.013663291931152, "learning_rate": 8.165640888413165e-07, "loss": 0.5183, "step": 1371 }, { "epoch": 0.18356970832218358, "grad_norm": 17.9320011138916, "learning_rate": 8.164302916778164e-07, "loss": 0.5854, "step": 1372 }, { "epoch": 0.1837035054856837, "grad_norm": 11.336955070495605, "learning_rate": 8.162964945143163e-07, "loss": 0.5305, "step": 1373 }, { "epoch": 0.18383730264918383, "grad_norm": 41.20369338989258, "learning_rate": 8.161626973508161e-07, "loss": 0.3593, "step": 1374 }, { "epoch": 0.18397109981268397, "grad_norm": 16.955686569213867, "learning_rate": 8.16028900187316e-07, "loss": 0.4127, "step": 1375 }, { "epoch": 0.1841048969761841, "grad_norm": 27.173725128173828, "learning_rate": 8.158951030238158e-07, "loss": 0.3778, "step": 1376 }, { "epoch": 0.18423869413968424, "grad_norm": 14.843877792358398, "learning_rate": 8.157613058603158e-07, "loss": 0.2962, "step": 1377 }, { "epoch": 0.18437249130318437, "grad_norm": 20.548770904541016, "learning_rate": 8.156275086968156e-07, "loss": 0.1887, "step": 1378 }, { "epoch": 0.18450628846668451, "grad_norm": 8.469454765319824, "learning_rate": 8.154937115333155e-07, "loss": 0.4358, "step": 1379 }, { "epoch": 0.18464008563018464, "grad_norm": 43.91693115234375, "learning_rate": 8.153599143698153e-07, "loss": 0.7074, "step": 1380 }, { "epoch": 0.18477388279368478, "grad_norm": 6.329970359802246, "learning_rate": 8.152261172063152e-07, "loss": 0.3033, "step": 1381 }, { "epoch": 0.1849076799571849, "grad_norm": 43.003700256347656, "learning_rate": 8.15092320042815e-07, "loss": 0.6533, "step": 1382 }, { "epoch": 0.18504147712068506, "grad_norm": 7.811152935028076, "learning_rate": 8.149585228793149e-07, "loss": 0.3721, "step": 1383 }, { "epoch": 0.18517527428418518, "grad_norm": 24.253189086914062, "learning_rate": 8.148247257158149e-07, "loss": 0.5675, "step": 1384 }, { "epoch": 0.1853090714476853, "grad_norm": 5.3288140296936035, "learning_rate": 8.146909285523147e-07, "loss": 0.2974, "step": 1385 }, { "epoch": 0.18544286861118545, "grad_norm": 17.06401252746582, "learning_rate": 8.145571313888145e-07, "loss": 0.4235, "step": 1386 }, { "epoch": 0.18557666577468557, "grad_norm": 5.2954535484313965, "learning_rate": 8.144233342253144e-07, "loss": 0.3213, "step": 1387 }, { "epoch": 0.18571046293818572, "grad_norm": 16.538074493408203, "learning_rate": 8.142895370618143e-07, "loss": 0.4877, "step": 1388 }, { "epoch": 0.18584426010168584, "grad_norm": 33.520484924316406, "learning_rate": 8.14155739898314e-07, "loss": 0.7166, "step": 1389 }, { "epoch": 0.185978057265186, "grad_norm": 5.131949424743652, "learning_rate": 8.140219427348139e-07, "loss": 0.362, "step": 1390 }, { "epoch": 0.1861118544286861, "grad_norm": 46.31850051879883, "learning_rate": 8.138881455713138e-07, "loss": 0.6241, "step": 1391 }, { "epoch": 0.18624565159218626, "grad_norm": 17.40606117248535, "learning_rate": 8.137543484078138e-07, "loss": 0.4117, "step": 1392 }, { "epoch": 0.18637944875568638, "grad_norm": 17.91387939453125, "learning_rate": 8.136205512443136e-07, "loss": 0.5208, "step": 1393 }, { "epoch": 0.18651324591918653, "grad_norm": 6.879427909851074, "learning_rate": 8.134867540808134e-07, "loss": 0.4991, "step": 1394 }, { "epoch": 0.18664704308268665, "grad_norm": 18.92538833618164, "learning_rate": 8.133529569173133e-07, "loss": 0.5341, "step": 1395 }, { "epoch": 0.18678084024618677, "grad_norm": 32.1536750793457, "learning_rate": 8.132191597538132e-07, "loss": 0.6941, "step": 1396 }, { "epoch": 0.18691463740968692, "grad_norm": 7.7171630859375, "learning_rate": 8.13085362590313e-07, "loss": 0.5815, "step": 1397 }, { "epoch": 0.18704843457318704, "grad_norm": 31.2913875579834, "learning_rate": 8.129515654268129e-07, "loss": 0.618, "step": 1398 }, { "epoch": 0.1871822317366872, "grad_norm": 40.75034713745117, "learning_rate": 8.128177682633128e-07, "loss": 0.5664, "step": 1399 }, { "epoch": 0.1873160289001873, "grad_norm": 14.882246971130371, "learning_rate": 8.126839710998127e-07, "loss": 0.6083, "step": 1400 }, { "epoch": 0.18744982606368746, "grad_norm": 15.299356460571289, "learning_rate": 8.125501739363125e-07, "loss": 0.5686, "step": 1401 }, { "epoch": 0.18758362322718758, "grad_norm": 39.747955322265625, "learning_rate": 8.124163767728124e-07, "loss": 0.5375, "step": 1402 }, { "epoch": 0.18771742039068773, "grad_norm": 33.106407165527344, "learning_rate": 8.122825796093123e-07, "loss": 0.3888, "step": 1403 }, { "epoch": 0.18785121755418785, "grad_norm": 13.878360748291016, "learning_rate": 8.121487824458121e-07, "loss": 0.4152, "step": 1404 }, { "epoch": 0.187985014717688, "grad_norm": 13.188817024230957, "learning_rate": 8.120149852823119e-07, "loss": 0.4803, "step": 1405 }, { "epoch": 0.18811881188118812, "grad_norm": 7.532507419586182, "learning_rate": 8.118811881188119e-07, "loss": 0.3598, "step": 1406 }, { "epoch": 0.18825260904468824, "grad_norm": 13.004508018493652, "learning_rate": 8.117473909553118e-07, "loss": 0.479, "step": 1407 }, { "epoch": 0.1883864062081884, "grad_norm": 24.47088050842285, "learning_rate": 8.116135937918117e-07, "loss": 0.388, "step": 1408 }, { "epoch": 0.1885202033716885, "grad_norm": 36.684844970703125, "learning_rate": 8.114797966283114e-07, "loss": 0.6439, "step": 1409 }, { "epoch": 0.18865400053518866, "grad_norm": 18.6260929107666, "learning_rate": 8.113459994648113e-07, "loss": 0.3052, "step": 1410 }, { "epoch": 0.18878779769868878, "grad_norm": 9.025012969970703, "learning_rate": 8.112122023013112e-07, "loss": 0.3844, "step": 1411 }, { "epoch": 0.18892159486218893, "grad_norm": 22.89337730407715, "learning_rate": 8.11078405137811e-07, "loss": 0.3062, "step": 1412 }, { "epoch": 0.18905539202568905, "grad_norm": 11.188862800598145, "learning_rate": 8.109446079743108e-07, "loss": 0.4594, "step": 1413 }, { "epoch": 0.1891891891891892, "grad_norm": 8.631292343139648, "learning_rate": 8.108108108108108e-07, "loss": 0.4487, "step": 1414 }, { "epoch": 0.18932298635268932, "grad_norm": 12.80964469909668, "learning_rate": 8.106770136473107e-07, "loss": 0.3453, "step": 1415 }, { "epoch": 0.18945678351618947, "grad_norm": 26.239086151123047, "learning_rate": 8.105432164838105e-07, "loss": 0.6186, "step": 1416 }, { "epoch": 0.1895905806796896, "grad_norm": 19.686962127685547, "learning_rate": 8.104094193203104e-07, "loss": 0.405, "step": 1417 }, { "epoch": 0.1897243778431897, "grad_norm": 29.06103515625, "learning_rate": 8.102756221568102e-07, "loss": 0.4319, "step": 1418 }, { "epoch": 0.18985817500668986, "grad_norm": 10.697157859802246, "learning_rate": 8.101418249933101e-07, "loss": 0.4596, "step": 1419 }, { "epoch": 0.18999197217018998, "grad_norm": 11.397748947143555, "learning_rate": 8.100080278298099e-07, "loss": 0.3961, "step": 1420 }, { "epoch": 0.19012576933369013, "grad_norm": 21.661685943603516, "learning_rate": 8.098742306663099e-07, "loss": 0.7012, "step": 1421 }, { "epoch": 0.19025956649719025, "grad_norm": 12.07683277130127, "learning_rate": 8.097404335028097e-07, "loss": 0.5602, "step": 1422 }, { "epoch": 0.1903933636606904, "grad_norm": 15.901124000549316, "learning_rate": 8.096066363393096e-07, "loss": 0.2761, "step": 1423 }, { "epoch": 0.19052716082419052, "grad_norm": 15.013309478759766, "learning_rate": 8.094728391758094e-07, "loss": 0.5123, "step": 1424 }, { "epoch": 0.19066095798769067, "grad_norm": 22.71619415283203, "learning_rate": 8.093390420123093e-07, "loss": 0.3463, "step": 1425 }, { "epoch": 0.1907947551511908, "grad_norm": 11.95036506652832, "learning_rate": 8.092052448488092e-07, "loss": 0.4115, "step": 1426 }, { "epoch": 0.19092855231469094, "grad_norm": 22.520442962646484, "learning_rate": 8.09071447685309e-07, "loss": 0.6545, "step": 1427 }, { "epoch": 0.19106234947819106, "grad_norm": 14.391231536865234, "learning_rate": 8.089376505218089e-07, "loss": 0.4055, "step": 1428 }, { "epoch": 0.19119614664169118, "grad_norm": 30.867713928222656, "learning_rate": 8.088038533583088e-07, "loss": 0.4611, "step": 1429 }, { "epoch": 0.19132994380519133, "grad_norm": 17.21769142150879, "learning_rate": 8.086700561948087e-07, "loss": 0.3409, "step": 1430 }, { "epoch": 0.19146374096869145, "grad_norm": 19.02610969543457, "learning_rate": 8.085362590313086e-07, "loss": 0.5508, "step": 1431 }, { "epoch": 0.1915975381321916, "grad_norm": 14.466287612915039, "learning_rate": 8.084024618678083e-07, "loss": 0.5141, "step": 1432 }, { "epoch": 0.19173133529569172, "grad_norm": 11.198751449584961, "learning_rate": 8.082686647043082e-07, "loss": 0.5033, "step": 1433 }, { "epoch": 0.19186513245919187, "grad_norm": 9.432311058044434, "learning_rate": 8.081348675408081e-07, "loss": 0.2589, "step": 1434 }, { "epoch": 0.191998929622692, "grad_norm": 17.351476669311523, "learning_rate": 8.080010703773079e-07, "loss": 0.4266, "step": 1435 }, { "epoch": 0.19213272678619214, "grad_norm": 14.179300308227539, "learning_rate": 8.078672732138078e-07, "loss": 0.5594, "step": 1436 }, { "epoch": 0.19226652394969226, "grad_norm": 21.257976531982422, "learning_rate": 8.077334760503077e-07, "loss": 0.4362, "step": 1437 }, { "epoch": 0.1924003211131924, "grad_norm": 9.64099407196045, "learning_rate": 8.075996788868076e-07, "loss": 0.3837, "step": 1438 }, { "epoch": 0.19253411827669253, "grad_norm": 15.786162376403809, "learning_rate": 8.074658817233074e-07, "loss": 0.5022, "step": 1439 }, { "epoch": 0.19266791544019266, "grad_norm": 9.024051666259766, "learning_rate": 8.073320845598073e-07, "loss": 0.3743, "step": 1440 }, { "epoch": 0.1928017126036928, "grad_norm": 28.77605438232422, "learning_rate": 8.071982873963071e-07, "loss": 0.258, "step": 1441 }, { "epoch": 0.19293550976719293, "grad_norm": 10.622001647949219, "learning_rate": 8.07064490232807e-07, "loss": 0.3982, "step": 1442 }, { "epoch": 0.19306930693069307, "grad_norm": 14.485406875610352, "learning_rate": 8.069306930693069e-07, "loss": 0.4128, "step": 1443 }, { "epoch": 0.1932031040941932, "grad_norm": 9.346256256103516, "learning_rate": 8.067968959058068e-07, "loss": 0.4797, "step": 1444 }, { "epoch": 0.19333690125769334, "grad_norm": 34.70783233642578, "learning_rate": 8.066630987423067e-07, "loss": 0.6698, "step": 1445 }, { "epoch": 0.19347069842119347, "grad_norm": 18.317760467529297, "learning_rate": 8.065293015788065e-07, "loss": 0.4306, "step": 1446 }, { "epoch": 0.19360449558469361, "grad_norm": 15.998642921447754, "learning_rate": 8.063955044153063e-07, "loss": 0.4988, "step": 1447 }, { "epoch": 0.19373829274819374, "grad_norm": 10.536349296569824, "learning_rate": 8.062617072518062e-07, "loss": 0.4248, "step": 1448 }, { "epoch": 0.19387208991169388, "grad_norm": 8.963170051574707, "learning_rate": 8.061279100883061e-07, "loss": 0.5885, "step": 1449 }, { "epoch": 0.194005887075194, "grad_norm": 11.10763931274414, "learning_rate": 8.059941129248059e-07, "loss": 0.4317, "step": 1450 }, { "epoch": 0.19413968423869413, "grad_norm": 48.86983871459961, "learning_rate": 8.058603157613058e-07, "loss": 0.7894, "step": 1451 }, { "epoch": 0.19427348140219428, "grad_norm": 14.692670822143555, "learning_rate": 8.057265185978057e-07, "loss": 0.6018, "step": 1452 }, { "epoch": 0.1944072785656944, "grad_norm": 13.442156791687012, "learning_rate": 8.055927214343056e-07, "loss": 0.5283, "step": 1453 }, { "epoch": 0.19454107572919455, "grad_norm": 16.570581436157227, "learning_rate": 8.054589242708055e-07, "loss": 0.422, "step": 1454 }, { "epoch": 0.19467487289269467, "grad_norm": 27.268524169921875, "learning_rate": 8.053251271073052e-07, "loss": 0.5394, "step": 1455 }, { "epoch": 0.19480867005619482, "grad_norm": 18.236921310424805, "learning_rate": 8.051913299438051e-07, "loss": 0.4361, "step": 1456 }, { "epoch": 0.19494246721969494, "grad_norm": 26.475467681884766, "learning_rate": 8.05057532780305e-07, "loss": 0.4785, "step": 1457 }, { "epoch": 0.19507626438319509, "grad_norm": 21.552885055541992, "learning_rate": 8.04923735616805e-07, "loss": 0.3192, "step": 1458 }, { "epoch": 0.1952100615466952, "grad_norm": 15.847596168518066, "learning_rate": 8.047899384533048e-07, "loss": 0.4794, "step": 1459 }, { "epoch": 0.19534385871019536, "grad_norm": 30.98371696472168, "learning_rate": 8.046561412898046e-07, "loss": 0.726, "step": 1460 }, { "epoch": 0.19547765587369548, "grad_norm": 32.4969596862793, "learning_rate": 8.045223441263045e-07, "loss": 0.4292, "step": 1461 }, { "epoch": 0.1956114530371956, "grad_norm": 21.511247634887695, "learning_rate": 8.043885469628044e-07, "loss": 0.4119, "step": 1462 }, { "epoch": 0.19574525020069575, "grad_norm": 9.826741218566895, "learning_rate": 8.042547497993042e-07, "loss": 0.395, "step": 1463 }, { "epoch": 0.19587904736419587, "grad_norm": 8.847390174865723, "learning_rate": 8.04120952635804e-07, "loss": 0.5288, "step": 1464 }, { "epoch": 0.19601284452769602, "grad_norm": 15.283175468444824, "learning_rate": 8.03987155472304e-07, "loss": 0.4226, "step": 1465 }, { "epoch": 0.19614664169119614, "grad_norm": 14.15552806854248, "learning_rate": 8.038533583088038e-07, "loss": 0.3402, "step": 1466 }, { "epoch": 0.1962804388546963, "grad_norm": 9.852507591247559, "learning_rate": 8.037195611453037e-07, "loss": 0.3158, "step": 1467 }, { "epoch": 0.1964142360181964, "grad_norm": 72.87781524658203, "learning_rate": 8.035857639818036e-07, "loss": 0.9129, "step": 1468 }, { "epoch": 0.19654803318169656, "grad_norm": 20.90854835510254, "learning_rate": 8.034519668183034e-07, "loss": 0.6185, "step": 1469 }, { "epoch": 0.19668183034519668, "grad_norm": 26.211544036865234, "learning_rate": 8.033181696548032e-07, "loss": 0.5332, "step": 1470 }, { "epoch": 0.19681562750869683, "grad_norm": 36.3130989074707, "learning_rate": 8.031843724913031e-07, "loss": 0.6873, "step": 1471 }, { "epoch": 0.19694942467219695, "grad_norm": 53.865318298339844, "learning_rate": 8.03050575327803e-07, "loss": 0.7602, "step": 1472 }, { "epoch": 0.19708322183569707, "grad_norm": 9.044698715209961, "learning_rate": 8.02916778164303e-07, "loss": 0.4861, "step": 1473 }, { "epoch": 0.19721701899919722, "grad_norm": 7.4868974685668945, "learning_rate": 8.027829810008027e-07, "loss": 0.4391, "step": 1474 }, { "epoch": 0.19735081616269734, "grad_norm": 16.310550689697266, "learning_rate": 8.026491838373026e-07, "loss": 0.428, "step": 1475 }, { "epoch": 0.1974846133261975, "grad_norm": 7.362921714782715, "learning_rate": 8.025153866738025e-07, "loss": 0.4354, "step": 1476 }, { "epoch": 0.1976184104896976, "grad_norm": 14.921101570129395, "learning_rate": 8.023815895103024e-07, "loss": 0.5267, "step": 1477 }, { "epoch": 0.19775220765319776, "grad_norm": 9.827252388000488, "learning_rate": 8.022477923468021e-07, "loss": 0.4744, "step": 1478 }, { "epoch": 0.19788600481669788, "grad_norm": 15.930474281311035, "learning_rate": 8.02113995183302e-07, "loss": 0.4542, "step": 1479 }, { "epoch": 0.19801980198019803, "grad_norm": 27.595083236694336, "learning_rate": 8.01980198019802e-07, "loss": 0.6262, "step": 1480 }, { "epoch": 0.19815359914369815, "grad_norm": 21.02247428894043, "learning_rate": 8.018464008563019e-07, "loss": 0.547, "step": 1481 }, { "epoch": 0.1982873963071983, "grad_norm": 36.99065017700195, "learning_rate": 8.017126036928017e-07, "loss": 0.5951, "step": 1482 }, { "epoch": 0.19842119347069842, "grad_norm": 35.75117492675781, "learning_rate": 8.015788065293015e-07, "loss": 0.5362, "step": 1483 }, { "epoch": 0.19855499063419854, "grad_norm": 26.42728614807129, "learning_rate": 8.014450093658014e-07, "loss": 0.5004, "step": 1484 }, { "epoch": 0.1986887877976987, "grad_norm": 40.682533264160156, "learning_rate": 8.013112122023013e-07, "loss": 0.5509, "step": 1485 }, { "epoch": 0.1988225849611988, "grad_norm": 14.49812126159668, "learning_rate": 8.011774150388011e-07, "loss": 0.4304, "step": 1486 }, { "epoch": 0.19895638212469896, "grad_norm": 9.288276672363281, "learning_rate": 8.010436178753011e-07, "loss": 0.502, "step": 1487 }, { "epoch": 0.19909017928819908, "grad_norm": 25.14592933654785, "learning_rate": 8.009098207118009e-07, "loss": 0.4496, "step": 1488 }, { "epoch": 0.19922397645169923, "grad_norm": 17.102027893066406, "learning_rate": 8.007760235483008e-07, "loss": 0.4037, "step": 1489 }, { "epoch": 0.19935777361519935, "grad_norm": 17.179101943969727, "learning_rate": 8.006422263848006e-07, "loss": 0.3381, "step": 1490 }, { "epoch": 0.1994915707786995, "grad_norm": 7.801584243774414, "learning_rate": 8.005084292213005e-07, "loss": 0.3936, "step": 1491 }, { "epoch": 0.19962536794219962, "grad_norm": 16.901185989379883, "learning_rate": 8.003746320578003e-07, "loss": 0.4965, "step": 1492 }, { "epoch": 0.19975916510569977, "grad_norm": 50.28840637207031, "learning_rate": 8.002408348943001e-07, "loss": 0.7711, "step": 1493 }, { "epoch": 0.1998929622691999, "grad_norm": 18.50911521911621, "learning_rate": 8.001070377308e-07, "loss": 0.563, "step": 1494 }, { "epoch": 0.2000267594327, "grad_norm": 13.578102111816406, "learning_rate": 7.999732405673e-07, "loss": 0.3868, "step": 1495 }, { "epoch": 0.20016055659620016, "grad_norm": 12.217427253723145, "learning_rate": 7.998394434037999e-07, "loss": 0.4292, "step": 1496 }, { "epoch": 0.20029435375970028, "grad_norm": 30.68413543701172, "learning_rate": 7.997056462402996e-07, "loss": 0.6278, "step": 1497 }, { "epoch": 0.20042815092320043, "grad_norm": 18.557640075683594, "learning_rate": 7.995718490767995e-07, "loss": 0.4114, "step": 1498 }, { "epoch": 0.20056194808670055, "grad_norm": 7.585826873779297, "learning_rate": 7.994380519132994e-07, "loss": 0.3617, "step": 1499 }, { "epoch": 0.2006957452502007, "grad_norm": 17.0360164642334, "learning_rate": 7.993042547497993e-07, "loss": 0.4363, "step": 1500 }, { "epoch": 0.20082954241370082, "grad_norm": 9.763684272766113, "learning_rate": 7.99170457586299e-07, "loss": 0.374, "step": 1501 }, { "epoch": 0.20096333957720097, "grad_norm": 13.77135944366455, "learning_rate": 7.99036660422799e-07, "loss": 0.5149, "step": 1502 }, { "epoch": 0.2010971367407011, "grad_norm": 14.308184623718262, "learning_rate": 7.989028632592989e-07, "loss": 0.548, "step": 1503 }, { "epoch": 0.20123093390420124, "grad_norm": 31.327220916748047, "learning_rate": 7.987690660957988e-07, "loss": 0.4381, "step": 1504 }, { "epoch": 0.20136473106770136, "grad_norm": 14.651300430297852, "learning_rate": 7.986352689322986e-07, "loss": 0.404, "step": 1505 }, { "epoch": 0.20149852823120148, "grad_norm": 36.3664436340332, "learning_rate": 7.985014717687984e-07, "loss": 0.6142, "step": 1506 }, { "epoch": 0.20163232539470163, "grad_norm": 11.882952690124512, "learning_rate": 7.983676746052983e-07, "loss": 0.653, "step": 1507 }, { "epoch": 0.20176612255820175, "grad_norm": 9.82363510131836, "learning_rate": 7.982338774417982e-07, "loss": 0.6363, "step": 1508 }, { "epoch": 0.2018999197217019, "grad_norm": 11.591914176940918, "learning_rate": 7.981000802782981e-07, "loss": 0.4282, "step": 1509 }, { "epoch": 0.20203371688520202, "grad_norm": 9.419525146484375, "learning_rate": 7.97966283114798e-07, "loss": 0.4525, "step": 1510 }, { "epoch": 0.20216751404870217, "grad_norm": 20.147947311401367, "learning_rate": 7.978324859512978e-07, "loss": 0.3658, "step": 1511 }, { "epoch": 0.2023013112122023, "grad_norm": 12.357690811157227, "learning_rate": 7.976986887877977e-07, "loss": 0.6424, "step": 1512 }, { "epoch": 0.20243510837570244, "grad_norm": 10.293766975402832, "learning_rate": 7.975648916242975e-07, "loss": 0.4457, "step": 1513 }, { "epoch": 0.20256890553920257, "grad_norm": 7.507662773132324, "learning_rate": 7.974310944607974e-07, "loss": 0.3679, "step": 1514 }, { "epoch": 0.20270270270270271, "grad_norm": 30.073741912841797, "learning_rate": 7.972972972972972e-07, "loss": 0.368, "step": 1515 }, { "epoch": 0.20283649986620284, "grad_norm": 20.951446533203125, "learning_rate": 7.97163500133797e-07, "loss": 0.361, "step": 1516 }, { "epoch": 0.20297029702970298, "grad_norm": 44.43687057495117, "learning_rate": 7.97029702970297e-07, "loss": 0.6732, "step": 1517 }, { "epoch": 0.2031040941932031, "grad_norm": 9.759699821472168, "learning_rate": 7.968959058067969e-07, "loss": 0.431, "step": 1518 }, { "epoch": 0.20323789135670323, "grad_norm": 30.1479434967041, "learning_rate": 7.967621086432968e-07, "loss": 0.7235, "step": 1519 }, { "epoch": 0.20337168852020338, "grad_norm": 31.84085464477539, "learning_rate": 7.966283114797965e-07, "loss": 0.3477, "step": 1520 }, { "epoch": 0.2035054856837035, "grad_norm": 13.055167198181152, "learning_rate": 7.964945143162964e-07, "loss": 0.4303, "step": 1521 }, { "epoch": 0.20363928284720365, "grad_norm": 14.279312133789062, "learning_rate": 7.963607171527963e-07, "loss": 0.3912, "step": 1522 }, { "epoch": 0.20377308001070377, "grad_norm": 14.692657470703125, "learning_rate": 7.962269199892962e-07, "loss": 0.6159, "step": 1523 }, { "epoch": 0.20390687717420392, "grad_norm": 11.743828773498535, "learning_rate": 7.960931228257961e-07, "loss": 0.3656, "step": 1524 }, { "epoch": 0.20404067433770404, "grad_norm": 35.73106384277344, "learning_rate": 7.959593256622959e-07, "loss": 0.4621, "step": 1525 }, { "epoch": 0.20417447150120419, "grad_norm": 20.684062957763672, "learning_rate": 7.958255284987958e-07, "loss": 0.4605, "step": 1526 }, { "epoch": 0.2043082686647043, "grad_norm": 24.006898880004883, "learning_rate": 7.956917313352957e-07, "loss": 0.5161, "step": 1527 }, { "epoch": 0.20444206582820446, "grad_norm": 17.655712127685547, "learning_rate": 7.955579341717955e-07, "loss": 0.551, "step": 1528 }, { "epoch": 0.20457586299170458, "grad_norm": 13.847597122192383, "learning_rate": 7.954241370082953e-07, "loss": 0.4893, "step": 1529 }, { "epoch": 0.2047096601552047, "grad_norm": 10.743647575378418, "learning_rate": 7.952903398447952e-07, "loss": 0.3289, "step": 1530 }, { "epoch": 0.20484345731870485, "grad_norm": 37.21485900878906, "learning_rate": 7.951565426812952e-07, "loss": 0.6831, "step": 1531 }, { "epoch": 0.20497725448220497, "grad_norm": 10.721757888793945, "learning_rate": 7.95022745517795e-07, "loss": 0.4162, "step": 1532 }, { "epoch": 0.20511105164570512, "grad_norm": 25.645244598388672, "learning_rate": 7.948889483542949e-07, "loss": 0.5841, "step": 1533 }, { "epoch": 0.20524484880920524, "grad_norm": 9.362749099731445, "learning_rate": 7.947551511907947e-07, "loss": 0.4784, "step": 1534 }, { "epoch": 0.2053786459727054, "grad_norm": 26.846572875976562, "learning_rate": 7.946213540272946e-07, "loss": 0.2672, "step": 1535 }, { "epoch": 0.2055124431362055, "grad_norm": 8.28139877319336, "learning_rate": 7.944875568637944e-07, "loss": 0.4793, "step": 1536 }, { "epoch": 0.20564624029970566, "grad_norm": 6.78912878036499, "learning_rate": 7.943537597002943e-07, "loss": 0.3549, "step": 1537 }, { "epoch": 0.20578003746320578, "grad_norm": 17.646512985229492, "learning_rate": 7.942199625367942e-07, "loss": 0.5807, "step": 1538 }, { "epoch": 0.20591383462670593, "grad_norm": 9.620989799499512, "learning_rate": 7.940861653732941e-07, "loss": 0.5181, "step": 1539 }, { "epoch": 0.20604763179020605, "grad_norm": 13.761813163757324, "learning_rate": 7.939523682097939e-07, "loss": 0.4058, "step": 1540 }, { "epoch": 0.20618142895370617, "grad_norm": 14.625779151916504, "learning_rate": 7.938185710462938e-07, "loss": 0.4773, "step": 1541 }, { "epoch": 0.20631522611720632, "grad_norm": 12.970795631408691, "learning_rate": 7.936847738827937e-07, "loss": 0.5613, "step": 1542 }, { "epoch": 0.20644902328070644, "grad_norm": 26.174768447875977, "learning_rate": 7.935509767192934e-07, "loss": 0.5737, "step": 1543 }, { "epoch": 0.2065828204442066, "grad_norm": 17.304841995239258, "learning_rate": 7.934171795557933e-07, "loss": 0.5512, "step": 1544 }, { "epoch": 0.2067166176077067, "grad_norm": 30.327293395996094, "learning_rate": 7.932833823922932e-07, "loss": 0.5466, "step": 1545 }, { "epoch": 0.20685041477120686, "grad_norm": 54.80016326904297, "learning_rate": 7.931495852287932e-07, "loss": 0.4838, "step": 1546 }, { "epoch": 0.20698421193470698, "grad_norm": 42.308292388916016, "learning_rate": 7.93015788065293e-07, "loss": 0.3779, "step": 1547 }, { "epoch": 0.20711800909820713, "grad_norm": 11.315117835998535, "learning_rate": 7.928819909017928e-07, "loss": 0.5158, "step": 1548 }, { "epoch": 0.20725180626170725, "grad_norm": 12.475515365600586, "learning_rate": 7.927481937382927e-07, "loss": 0.5424, "step": 1549 }, { "epoch": 0.2073856034252074, "grad_norm": 14.070937156677246, "learning_rate": 7.926143965747926e-07, "loss": 0.5486, "step": 1550 }, { "epoch": 0.20751940058870752, "grad_norm": 16.757047653198242, "learning_rate": 7.924805994112924e-07, "loss": 0.4727, "step": 1551 }, { "epoch": 0.20765319775220764, "grad_norm": 25.830524444580078, "learning_rate": 7.923468022477923e-07, "loss": 0.4864, "step": 1552 }, { "epoch": 0.2077869949157078, "grad_norm": 37.67808151245117, "learning_rate": 7.922130050842922e-07, "loss": 0.6128, "step": 1553 }, { "epoch": 0.2079207920792079, "grad_norm": 15.380407333374023, "learning_rate": 7.920792079207921e-07, "loss": 0.5285, "step": 1554 }, { "epoch": 0.20805458924270806, "grad_norm": 12.014679908752441, "learning_rate": 7.919454107572919e-07, "loss": 0.5706, "step": 1555 }, { "epoch": 0.20818838640620818, "grad_norm": 7.124541282653809, "learning_rate": 7.918116135937918e-07, "loss": 0.3468, "step": 1556 }, { "epoch": 0.20832218356970833, "grad_norm": 15.507020950317383, "learning_rate": 7.916778164302916e-07, "loss": 0.5682, "step": 1557 }, { "epoch": 0.20845598073320845, "grad_norm": 29.63962745666504, "learning_rate": 7.915440192667915e-07, "loss": 0.3039, "step": 1558 }, { "epoch": 0.2085897778967086, "grad_norm": 14.899747848510742, "learning_rate": 7.914102221032913e-07, "loss": 0.7749, "step": 1559 }, { "epoch": 0.20872357506020872, "grad_norm": 8.440888404846191, "learning_rate": 7.912764249397912e-07, "loss": 0.3634, "step": 1560 }, { "epoch": 0.20885737222370887, "grad_norm": 32.91565704345703, "learning_rate": 7.911426277762912e-07, "loss": 0.5854, "step": 1561 }, { "epoch": 0.208991169387209, "grad_norm": 13.117196083068848, "learning_rate": 7.91008830612791e-07, "loss": 0.5329, "step": 1562 }, { "epoch": 0.2091249665507091, "grad_norm": 26.43032455444336, "learning_rate": 7.908750334492908e-07, "loss": 0.4054, "step": 1563 }, { "epoch": 0.20925876371420926, "grad_norm": 24.705900192260742, "learning_rate": 7.907412362857907e-07, "loss": 0.5076, "step": 1564 }, { "epoch": 0.20939256087770938, "grad_norm": 12.502325057983398, "learning_rate": 7.906074391222906e-07, "loss": 0.4692, "step": 1565 }, { "epoch": 0.20952635804120953, "grad_norm": 11.091124534606934, "learning_rate": 7.904736419587905e-07, "loss": 0.3521, "step": 1566 }, { "epoch": 0.20966015520470965, "grad_norm": 9.684977531433105, "learning_rate": 7.903398447952902e-07, "loss": 0.5007, "step": 1567 }, { "epoch": 0.2097939523682098, "grad_norm": 11.329195976257324, "learning_rate": 7.902060476317902e-07, "loss": 0.3944, "step": 1568 }, { "epoch": 0.20992774953170992, "grad_norm": 9.475909233093262, "learning_rate": 7.900722504682901e-07, "loss": 0.4532, "step": 1569 }, { "epoch": 0.21006154669521007, "grad_norm": 29.603778839111328, "learning_rate": 7.899384533047899e-07, "loss": 0.3873, "step": 1570 }, { "epoch": 0.2101953438587102, "grad_norm": 16.4836483001709, "learning_rate": 7.898046561412897e-07, "loss": 0.511, "step": 1571 }, { "epoch": 0.21032914102221034, "grad_norm": 9.076254844665527, "learning_rate": 7.896708589777896e-07, "loss": 0.4034, "step": 1572 }, { "epoch": 0.21046293818571046, "grad_norm": 14.20362663269043, "learning_rate": 7.895370618142895e-07, "loss": 0.4189, "step": 1573 }, { "epoch": 0.21059673534921058, "grad_norm": 9.192584991455078, "learning_rate": 7.894032646507893e-07, "loss": 0.3308, "step": 1574 }, { "epoch": 0.21073053251271073, "grad_norm": 15.277337074279785, "learning_rate": 7.892694674872893e-07, "loss": 0.2967, "step": 1575 }, { "epoch": 0.21086432967621085, "grad_norm": 19.246171951293945, "learning_rate": 7.891356703237891e-07, "loss": 0.5471, "step": 1576 }, { "epoch": 0.210998126839711, "grad_norm": 19.531387329101562, "learning_rate": 7.89001873160289e-07, "loss": 0.6087, "step": 1577 }, { "epoch": 0.21113192400321112, "grad_norm": 20.86586570739746, "learning_rate": 7.888680759967888e-07, "loss": 0.7327, "step": 1578 }, { "epoch": 0.21126572116671127, "grad_norm": 11.660430908203125, "learning_rate": 7.887342788332887e-07, "loss": 0.3771, "step": 1579 }, { "epoch": 0.2113995183302114, "grad_norm": 19.148265838623047, "learning_rate": 7.886004816697886e-07, "loss": 0.5557, "step": 1580 }, { "epoch": 0.21153331549371154, "grad_norm": 23.61522674560547, "learning_rate": 7.884666845062884e-07, "loss": 0.4928, "step": 1581 }, { "epoch": 0.21166711265721166, "grad_norm": 14.679976463317871, "learning_rate": 7.883328873427882e-07, "loss": 0.2952, "step": 1582 }, { "epoch": 0.2118009098207118, "grad_norm": 10.19521427154541, "learning_rate": 7.881990901792882e-07, "loss": 0.5753, "step": 1583 }, { "epoch": 0.21193470698421193, "grad_norm": 12.961514472961426, "learning_rate": 7.880652930157881e-07, "loss": 0.622, "step": 1584 }, { "epoch": 0.21206850414771206, "grad_norm": 25.15813446044922, "learning_rate": 7.87931495852288e-07, "loss": 0.411, "step": 1585 }, { "epoch": 0.2122023013112122, "grad_norm": 9.127864837646484, "learning_rate": 7.877976986887877e-07, "loss": 0.5316, "step": 1586 }, { "epoch": 0.21233609847471233, "grad_norm": 7.281818389892578, "learning_rate": 7.876639015252876e-07, "loss": 0.4512, "step": 1587 }, { "epoch": 0.21246989563821247, "grad_norm": 30.903066635131836, "learning_rate": 7.875301043617875e-07, "loss": 0.3813, "step": 1588 }, { "epoch": 0.2126036928017126, "grad_norm": 45.03158950805664, "learning_rate": 7.873963071982874e-07, "loss": 0.598, "step": 1589 }, { "epoch": 0.21273748996521274, "grad_norm": 11.616840362548828, "learning_rate": 7.872625100347872e-07, "loss": 0.4565, "step": 1590 }, { "epoch": 0.21287128712871287, "grad_norm": 36.0461311340332, "learning_rate": 7.871287128712871e-07, "loss": 0.6083, "step": 1591 }, { "epoch": 0.21300508429221301, "grad_norm": 45.35133743286133, "learning_rate": 7.86994915707787e-07, "loss": 0.2956, "step": 1592 }, { "epoch": 0.21313888145571314, "grad_norm": 7.332738399505615, "learning_rate": 7.868611185442869e-07, "loss": 0.4375, "step": 1593 }, { "epoch": 0.21327267861921329, "grad_norm": 23.85276985168457, "learning_rate": 7.867273213807867e-07, "loss": 0.3407, "step": 1594 }, { "epoch": 0.2134064757827134, "grad_norm": 8.67752456665039, "learning_rate": 7.865935242172865e-07, "loss": 0.4542, "step": 1595 }, { "epoch": 0.21354027294621353, "grad_norm": 14.351076126098633, "learning_rate": 7.864597270537864e-07, "loss": 0.4298, "step": 1596 }, { "epoch": 0.21367407010971368, "grad_norm": 10.876717567443848, "learning_rate": 7.863259298902863e-07, "loss": 0.4249, "step": 1597 }, { "epoch": 0.2138078672732138, "grad_norm": 24.51763916015625, "learning_rate": 7.861921327267862e-07, "loss": 0.614, "step": 1598 }, { "epoch": 0.21394166443671395, "grad_norm": 7.922404766082764, "learning_rate": 7.86058335563286e-07, "loss": 0.3266, "step": 1599 }, { "epoch": 0.21407546160021407, "grad_norm": 19.39006996154785, "learning_rate": 7.859245383997859e-07, "loss": 0.3553, "step": 1600 }, { "epoch": 0.21420925876371422, "grad_norm": 52.06029510498047, "learning_rate": 7.857907412362857e-07, "loss": 0.7301, "step": 1601 }, { "epoch": 0.21434305592721434, "grad_norm": 13.548552513122559, "learning_rate": 7.856569440727856e-07, "loss": 0.4348, "step": 1602 }, { "epoch": 0.2144768530907145, "grad_norm": 24.992862701416016, "learning_rate": 7.855231469092855e-07, "loss": 0.4668, "step": 1603 }, { "epoch": 0.2146106502542146, "grad_norm": 22.891555786132812, "learning_rate": 7.853893497457853e-07, "loss": 0.5316, "step": 1604 }, { "epoch": 0.21474444741771476, "grad_norm": 30.759138107299805, "learning_rate": 7.852555525822852e-07, "loss": 0.7114, "step": 1605 }, { "epoch": 0.21487824458121488, "grad_norm": 13.2944974899292, "learning_rate": 7.851217554187851e-07, "loss": 0.3671, "step": 1606 }, { "epoch": 0.215012041744715, "grad_norm": 42.519676208496094, "learning_rate": 7.84987958255285e-07, "loss": 0.7422, "step": 1607 }, { "epoch": 0.21514583890821515, "grad_norm": 36.12497329711914, "learning_rate": 7.848541610917849e-07, "loss": 0.5744, "step": 1608 }, { "epoch": 0.21527963607171527, "grad_norm": 6.349627494812012, "learning_rate": 7.847203639282846e-07, "loss": 0.4184, "step": 1609 }, { "epoch": 0.21541343323521542, "grad_norm": 26.485004425048828, "learning_rate": 7.845865667647845e-07, "loss": 0.6496, "step": 1610 }, { "epoch": 0.21554723039871554, "grad_norm": 24.646305084228516, "learning_rate": 7.844527696012844e-07, "loss": 0.4734, "step": 1611 }, { "epoch": 0.2156810275622157, "grad_norm": 28.716405868530273, "learning_rate": 7.843189724377844e-07, "loss": 0.6598, "step": 1612 }, { "epoch": 0.2158148247257158, "grad_norm": 28.917312622070312, "learning_rate": 7.841851752742841e-07, "loss": 0.6375, "step": 1613 }, { "epoch": 0.21594862188921596, "grad_norm": 41.25215148925781, "learning_rate": 7.84051378110784e-07, "loss": 0.3129, "step": 1614 }, { "epoch": 0.21608241905271608, "grad_norm": 26.903348922729492, "learning_rate": 7.839175809472839e-07, "loss": 0.6096, "step": 1615 }, { "epoch": 0.21621621621621623, "grad_norm": 10.082923889160156, "learning_rate": 7.837837837837838e-07, "loss": 0.466, "step": 1616 }, { "epoch": 0.21635001337971635, "grad_norm": 13.9113187789917, "learning_rate": 7.836499866202836e-07, "loss": 0.4838, "step": 1617 }, { "epoch": 0.21648381054321647, "grad_norm": 38.25431442260742, "learning_rate": 7.835161894567834e-07, "loss": 0.4285, "step": 1618 }, { "epoch": 0.21661760770671662, "grad_norm": 30.31580352783203, "learning_rate": 7.833823922932834e-07, "loss": 0.5094, "step": 1619 }, { "epoch": 0.21675140487021674, "grad_norm": 42.48313522338867, "learning_rate": 7.832485951297833e-07, "loss": 0.6006, "step": 1620 }, { "epoch": 0.2168852020337169, "grad_norm": 35.96217727661133, "learning_rate": 7.831147979662831e-07, "loss": 0.5646, "step": 1621 }, { "epoch": 0.217018999197217, "grad_norm": 21.414064407348633, "learning_rate": 7.82981000802783e-07, "loss": 0.4919, "step": 1622 }, { "epoch": 0.21715279636071716, "grad_norm": 55.01673126220703, "learning_rate": 7.828472036392828e-07, "loss": 0.5298, "step": 1623 }, { "epoch": 0.21728659352421728, "grad_norm": 38.64816665649414, "learning_rate": 7.827134064757826e-07, "loss": 0.4703, "step": 1624 }, { "epoch": 0.21742039068771743, "grad_norm": 30.681922912597656, "learning_rate": 7.825796093122825e-07, "loss": 0.4881, "step": 1625 }, { "epoch": 0.21755418785121755, "grad_norm": 25.147274017333984, "learning_rate": 7.824458121487824e-07, "loss": 0.4967, "step": 1626 }, { "epoch": 0.2176879850147177, "grad_norm": 12.836283683776855, "learning_rate": 7.823120149852824e-07, "loss": 0.5243, "step": 1627 }, { "epoch": 0.21782178217821782, "grad_norm": 11.253424644470215, "learning_rate": 7.821782178217821e-07, "loss": 0.4384, "step": 1628 }, { "epoch": 0.21795557934171794, "grad_norm": 10.726760864257812, "learning_rate": 7.82044420658282e-07, "loss": 0.512, "step": 1629 }, { "epoch": 0.2180893765052181, "grad_norm": 9.625816345214844, "learning_rate": 7.819106234947819e-07, "loss": 0.4128, "step": 1630 }, { "epoch": 0.2182231736687182, "grad_norm": 9.765437126159668, "learning_rate": 7.817768263312818e-07, "loss": 0.3775, "step": 1631 }, { "epoch": 0.21835697083221836, "grad_norm": 11.599120140075684, "learning_rate": 7.816430291677815e-07, "loss": 0.2792, "step": 1632 }, { "epoch": 0.21849076799571848, "grad_norm": 31.291276931762695, "learning_rate": 7.815092320042814e-07, "loss": 0.5215, "step": 1633 }, { "epoch": 0.21862456515921863, "grad_norm": 8.916696548461914, "learning_rate": 7.813754348407814e-07, "loss": 0.3656, "step": 1634 }, { "epoch": 0.21875836232271875, "grad_norm": 16.55954360961914, "learning_rate": 7.812416376772813e-07, "loss": 0.3342, "step": 1635 }, { "epoch": 0.2188921594862189, "grad_norm": 8.990848541259766, "learning_rate": 7.811078405137811e-07, "loss": 0.2476, "step": 1636 }, { "epoch": 0.21902595664971902, "grad_norm": 14.918166160583496, "learning_rate": 7.809740433502809e-07, "loss": 0.3736, "step": 1637 }, { "epoch": 0.21915975381321917, "grad_norm": 28.8228816986084, "learning_rate": 7.808402461867808e-07, "loss": 0.4368, "step": 1638 }, { "epoch": 0.2192935509767193, "grad_norm": 64.87861633300781, "learning_rate": 7.807064490232807e-07, "loss": 0.9008, "step": 1639 }, { "epoch": 0.2194273481402194, "grad_norm": 13.808755874633789, "learning_rate": 7.805726518597805e-07, "loss": 0.4378, "step": 1640 }, { "epoch": 0.21956114530371956, "grad_norm": 16.229251861572266, "learning_rate": 7.804388546962804e-07, "loss": 0.2789, "step": 1641 }, { "epoch": 0.21969494246721968, "grad_norm": 38.374961853027344, "learning_rate": 7.803050575327803e-07, "loss": 0.7807, "step": 1642 }, { "epoch": 0.21982873963071983, "grad_norm": 14.594093322753906, "learning_rate": 7.801712603692802e-07, "loss": 0.3078, "step": 1643 }, { "epoch": 0.21996253679421995, "grad_norm": 22.80158042907715, "learning_rate": 7.8003746320578e-07, "loss": 0.5571, "step": 1644 }, { "epoch": 0.2200963339577201, "grad_norm": 15.041318893432617, "learning_rate": 7.799036660422799e-07, "loss": 0.4072, "step": 1645 }, { "epoch": 0.22023013112122022, "grad_norm": 10.633419036865234, "learning_rate": 7.797698688787797e-07, "loss": 0.3541, "step": 1646 }, { "epoch": 0.22036392828472037, "grad_norm": 12.298516273498535, "learning_rate": 7.796360717152795e-07, "loss": 0.5253, "step": 1647 }, { "epoch": 0.2204977254482205, "grad_norm": 16.395235061645508, "learning_rate": 7.795022745517794e-07, "loss": 0.413, "step": 1648 }, { "epoch": 0.22063152261172064, "grad_norm": 16.762989044189453, "learning_rate": 7.793684773882794e-07, "loss": 0.596, "step": 1649 }, { "epoch": 0.22076531977522076, "grad_norm": 19.918655395507812, "learning_rate": 7.792346802247793e-07, "loss": 0.549, "step": 1650 }, { "epoch": 0.22089911693872089, "grad_norm": 44.29096603393555, "learning_rate": 7.79100883061279e-07, "loss": 0.4566, "step": 1651 }, { "epoch": 0.22103291410222103, "grad_norm": 11.867606163024902, "learning_rate": 7.789670858977789e-07, "loss": 0.353, "step": 1652 }, { "epoch": 0.22116671126572116, "grad_norm": 28.363037109375, "learning_rate": 7.788332887342788e-07, "loss": 0.3155, "step": 1653 }, { "epoch": 0.2213005084292213, "grad_norm": 13.975052833557129, "learning_rate": 7.786994915707787e-07, "loss": 0.4804, "step": 1654 }, { "epoch": 0.22143430559272143, "grad_norm": 10.488882064819336, "learning_rate": 7.785656944072784e-07, "loss": 0.4529, "step": 1655 }, { "epoch": 0.22156810275622157, "grad_norm": 49.90967559814453, "learning_rate": 7.784318972437784e-07, "loss": 0.4371, "step": 1656 }, { "epoch": 0.2217018999197217, "grad_norm": 20.764930725097656, "learning_rate": 7.782981000802783e-07, "loss": 0.6343, "step": 1657 }, { "epoch": 0.22183569708322184, "grad_norm": 9.306849479675293, "learning_rate": 7.781643029167782e-07, "loss": 0.2415, "step": 1658 }, { "epoch": 0.22196949424672197, "grad_norm": 13.599205017089844, "learning_rate": 7.78030505753278e-07, "loss": 0.4235, "step": 1659 }, { "epoch": 0.22210329141022211, "grad_norm": 15.648188591003418, "learning_rate": 7.778967085897778e-07, "loss": 0.3388, "step": 1660 }, { "epoch": 0.22223708857372224, "grad_norm": 15.08504867553711, "learning_rate": 7.777629114262777e-07, "loss": 0.5483, "step": 1661 }, { "epoch": 0.22237088573722238, "grad_norm": 15.75539779663086, "learning_rate": 7.776291142627776e-07, "loss": 0.434, "step": 1662 }, { "epoch": 0.2225046829007225, "grad_norm": 15.622505187988281, "learning_rate": 7.774953170992775e-07, "loss": 0.5991, "step": 1663 }, { "epoch": 0.22263848006422263, "grad_norm": 9.606407165527344, "learning_rate": 7.773615199357774e-07, "loss": 0.4849, "step": 1664 }, { "epoch": 0.22277227722772278, "grad_norm": 23.125944137573242, "learning_rate": 7.772277227722772e-07, "loss": 0.4658, "step": 1665 }, { "epoch": 0.2229060743912229, "grad_norm": 49.6617431640625, "learning_rate": 7.770939256087771e-07, "loss": 0.7905, "step": 1666 }, { "epoch": 0.22303987155472305, "grad_norm": 30.781614303588867, "learning_rate": 7.769601284452769e-07, "loss": 0.7366, "step": 1667 }, { "epoch": 0.22317366871822317, "grad_norm": 12.250410079956055, "learning_rate": 7.768263312817768e-07, "loss": 0.4096, "step": 1668 }, { "epoch": 0.22330746588172332, "grad_norm": 14.61763858795166, "learning_rate": 7.766925341182766e-07, "loss": 0.451, "step": 1669 }, { "epoch": 0.22344126304522344, "grad_norm": 8.47390365600586, "learning_rate": 7.765587369547765e-07, "loss": 0.4167, "step": 1670 }, { "epoch": 0.22357506020872359, "grad_norm": 14.591095924377441, "learning_rate": 7.764249397912764e-07, "loss": 0.4434, "step": 1671 }, { "epoch": 0.2237088573722237, "grad_norm": 20.27356719970703, "learning_rate": 7.762911426277763e-07, "loss": 0.6787, "step": 1672 }, { "epoch": 0.22384265453572386, "grad_norm": 23.21569061279297, "learning_rate": 7.761573454642762e-07, "loss": 0.4365, "step": 1673 }, { "epoch": 0.22397645169922398, "grad_norm": 15.145547866821289, "learning_rate": 7.760235483007759e-07, "loss": 0.4795, "step": 1674 }, { "epoch": 0.2241102488627241, "grad_norm": 12.153031349182129, "learning_rate": 7.758897511372758e-07, "loss": 0.4591, "step": 1675 }, { "epoch": 0.22424404602622425, "grad_norm": 24.258119583129883, "learning_rate": 7.757559539737757e-07, "loss": 0.539, "step": 1676 }, { "epoch": 0.22437784318972437, "grad_norm": 14.030094146728516, "learning_rate": 7.756221568102756e-07, "loss": 0.5219, "step": 1677 }, { "epoch": 0.22451164035322452, "grad_norm": 15.001423835754395, "learning_rate": 7.754883596467755e-07, "loss": 0.5467, "step": 1678 }, { "epoch": 0.22464543751672464, "grad_norm": 26.00774574279785, "learning_rate": 7.753545624832753e-07, "loss": 0.4102, "step": 1679 }, { "epoch": 0.2247792346802248, "grad_norm": 28.431720733642578, "learning_rate": 7.752207653197752e-07, "loss": 0.6003, "step": 1680 }, { "epoch": 0.2249130318437249, "grad_norm": 32.44437026977539, "learning_rate": 7.750869681562751e-07, "loss": 0.4752, "step": 1681 }, { "epoch": 0.22504682900722506, "grad_norm": 13.274910926818848, "learning_rate": 7.749531709927749e-07, "loss": 0.3531, "step": 1682 }, { "epoch": 0.22518062617072518, "grad_norm": 13.932464599609375, "learning_rate": 7.748193738292747e-07, "loss": 0.3837, "step": 1683 }, { "epoch": 0.22531442333422533, "grad_norm": 26.985837936401367, "learning_rate": 7.746855766657746e-07, "loss": 0.5733, "step": 1684 }, { "epoch": 0.22544822049772545, "grad_norm": 14.97724437713623, "learning_rate": 7.745517795022746e-07, "loss": 0.5379, "step": 1685 }, { "epoch": 0.22558201766122557, "grad_norm": 30.095355987548828, "learning_rate": 7.744179823387744e-07, "loss": 0.275, "step": 1686 }, { "epoch": 0.22571581482472572, "grad_norm": 11.657444953918457, "learning_rate": 7.742841851752743e-07, "loss": 0.501, "step": 1687 }, { "epoch": 0.22584961198822584, "grad_norm": 41.8042106628418, "learning_rate": 7.741503880117741e-07, "loss": 0.4567, "step": 1688 }, { "epoch": 0.225983409151726, "grad_norm": 12.690722465515137, "learning_rate": 7.74016590848274e-07, "loss": 0.4447, "step": 1689 }, { "epoch": 0.2261172063152261, "grad_norm": 41.832550048828125, "learning_rate": 7.738827936847738e-07, "loss": 0.686, "step": 1690 }, { "epoch": 0.22625100347872626, "grad_norm": 33.04152297973633, "learning_rate": 7.737489965212737e-07, "loss": 0.4873, "step": 1691 }, { "epoch": 0.22638480064222638, "grad_norm": 9.902626037597656, "learning_rate": 7.736151993577736e-07, "loss": 0.3976, "step": 1692 }, { "epoch": 0.22651859780572653, "grad_norm": 10.623470306396484, "learning_rate": 7.734814021942735e-07, "loss": 0.5017, "step": 1693 }, { "epoch": 0.22665239496922665, "grad_norm": 19.36290740966797, "learning_rate": 7.733476050307733e-07, "loss": 0.5208, "step": 1694 }, { "epoch": 0.2267861921327268, "grad_norm": 21.16316795349121, "learning_rate": 7.732138078672732e-07, "loss": 0.4946, "step": 1695 }, { "epoch": 0.22691998929622692, "grad_norm": 7.6001410484313965, "learning_rate": 7.730800107037731e-07, "loss": 0.2771, "step": 1696 }, { "epoch": 0.22705378645972704, "grad_norm": 21.064130783081055, "learning_rate": 7.72946213540273e-07, "loss": 0.5631, "step": 1697 }, { "epoch": 0.2271875836232272, "grad_norm": 27.93939781188965, "learning_rate": 7.728124163767727e-07, "loss": 0.5639, "step": 1698 }, { "epoch": 0.2273213807867273, "grad_norm": 20.972864151000977, "learning_rate": 7.726786192132726e-07, "loss": 0.5056, "step": 1699 }, { "epoch": 0.22745517795022746, "grad_norm": 20.66187286376953, "learning_rate": 7.725448220497726e-07, "loss": 0.383, "step": 1700 }, { "epoch": 0.22758897511372758, "grad_norm": 40.54756164550781, "learning_rate": 7.724110248862724e-07, "loss": 0.587, "step": 1701 }, { "epoch": 0.22772277227722773, "grad_norm": 18.96567153930664, "learning_rate": 7.722772277227722e-07, "loss": 0.1255, "step": 1702 }, { "epoch": 0.22785656944072785, "grad_norm": 14.205692291259766, "learning_rate": 7.721434305592721e-07, "loss": 0.423, "step": 1703 }, { "epoch": 0.227990366604228, "grad_norm": 28.481826782226562, "learning_rate": 7.72009633395772e-07, "loss": 0.5583, "step": 1704 }, { "epoch": 0.22812416376772812, "grad_norm": 12.118751525878906, "learning_rate": 7.718758362322718e-07, "loss": 0.3706, "step": 1705 }, { "epoch": 0.22825796093122827, "grad_norm": 32.965789794921875, "learning_rate": 7.717420390687716e-07, "loss": 0.478, "step": 1706 }, { "epoch": 0.2283917580947284, "grad_norm": 16.470703125, "learning_rate": 7.716082419052716e-07, "loss": 0.4031, "step": 1707 }, { "epoch": 0.2285255552582285, "grad_norm": 8.866830825805664, "learning_rate": 7.714744447417715e-07, "loss": 0.3555, "step": 1708 }, { "epoch": 0.22865935242172866, "grad_norm": 19.230566024780273, "learning_rate": 7.713406475782713e-07, "loss": 0.4796, "step": 1709 }, { "epoch": 0.22879314958522878, "grad_norm": 10.708561897277832, "learning_rate": 7.712068504147712e-07, "loss": 0.5068, "step": 1710 }, { "epoch": 0.22892694674872893, "grad_norm": 22.769411087036133, "learning_rate": 7.71073053251271e-07, "loss": 0.5456, "step": 1711 }, { "epoch": 0.22906074391222905, "grad_norm": 10.31895923614502, "learning_rate": 7.709392560877709e-07, "loss": 0.3564, "step": 1712 }, { "epoch": 0.2291945410757292, "grad_norm": 17.751842498779297, "learning_rate": 7.708054589242707e-07, "loss": 0.5264, "step": 1713 }, { "epoch": 0.22932833823922932, "grad_norm": 14.206411361694336, "learning_rate": 7.706716617607706e-07, "loss": 0.3249, "step": 1714 }, { "epoch": 0.22946213540272947, "grad_norm": 17.624164581298828, "learning_rate": 7.705378645972706e-07, "loss": 0.2871, "step": 1715 }, { "epoch": 0.2295959325662296, "grad_norm": 16.306983947753906, "learning_rate": 7.704040674337704e-07, "loss": 0.4017, "step": 1716 }, { "epoch": 0.22972972972972974, "grad_norm": 29.741806030273438, "learning_rate": 7.702702702702702e-07, "loss": 0.7084, "step": 1717 }, { "epoch": 0.22986352689322986, "grad_norm": 11.190682411193848, "learning_rate": 7.701364731067701e-07, "loss": 0.3735, "step": 1718 }, { "epoch": 0.22999732405672998, "grad_norm": 12.770589828491211, "learning_rate": 7.7000267594327e-07, "loss": 0.3144, "step": 1719 }, { "epoch": 0.23013112122023013, "grad_norm": 10.326930046081543, "learning_rate": 7.698688787797699e-07, "loss": 0.439, "step": 1720 }, { "epoch": 0.23026491838373025, "grad_norm": 17.448877334594727, "learning_rate": 7.697350816162696e-07, "loss": 0.3749, "step": 1721 }, { "epoch": 0.2303987155472304, "grad_norm": 19.173755645751953, "learning_rate": 7.696012844527696e-07, "loss": 0.5787, "step": 1722 }, { "epoch": 0.23053251271073052, "grad_norm": 11.519134521484375, "learning_rate": 7.694674872892695e-07, "loss": 0.3439, "step": 1723 }, { "epoch": 0.23066630987423067, "grad_norm": 16.70917510986328, "learning_rate": 7.693336901257694e-07, "loss": 0.4379, "step": 1724 }, { "epoch": 0.2308001070377308, "grad_norm": 20.442516326904297, "learning_rate": 7.691998929622691e-07, "loss": 0.5623, "step": 1725 }, { "epoch": 0.23093390420123094, "grad_norm": 15.832517623901367, "learning_rate": 7.69066095798769e-07, "loss": 0.3917, "step": 1726 }, { "epoch": 0.23106770136473107, "grad_norm": 31.647958755493164, "learning_rate": 7.689322986352689e-07, "loss": 0.2497, "step": 1727 }, { "epoch": 0.23120149852823121, "grad_norm": 35.38355255126953, "learning_rate": 7.687985014717687e-07, "loss": 0.7143, "step": 1728 }, { "epoch": 0.23133529569173134, "grad_norm": 16.69539451599121, "learning_rate": 7.686647043082686e-07, "loss": 0.4129, "step": 1729 }, { "epoch": 0.23146909285523146, "grad_norm": 24.1192684173584, "learning_rate": 7.685309071447685e-07, "loss": 0.5691, "step": 1730 }, { "epoch": 0.2316028900187316, "grad_norm": 33.59052658081055, "learning_rate": 7.683971099812684e-07, "loss": 0.2748, "step": 1731 }, { "epoch": 0.23173668718223173, "grad_norm": 20.748188018798828, "learning_rate": 7.682633128177682e-07, "loss": 0.4968, "step": 1732 }, { "epoch": 0.23187048434573188, "grad_norm": 30.881195068359375, "learning_rate": 7.681295156542681e-07, "loss": 0.5891, "step": 1733 }, { "epoch": 0.232004281509232, "grad_norm": 12.7811279296875, "learning_rate": 7.67995718490768e-07, "loss": 0.4135, "step": 1734 }, { "epoch": 0.23213807867273215, "grad_norm": 36.71193313598633, "learning_rate": 7.678619213272678e-07, "loss": 0.2379, "step": 1735 }, { "epoch": 0.23227187583623227, "grad_norm": 25.562170028686523, "learning_rate": 7.677281241637676e-07, "loss": 0.743, "step": 1736 }, { "epoch": 0.23240567299973242, "grad_norm": 17.076271057128906, "learning_rate": 7.675943270002676e-07, "loss": 0.5704, "step": 1737 }, { "epoch": 0.23253947016323254, "grad_norm": 17.610042572021484, "learning_rate": 7.674605298367675e-07, "loss": 0.4055, "step": 1738 }, { "epoch": 0.23267326732673269, "grad_norm": 24.19269371032715, "learning_rate": 7.673267326732673e-07, "loss": 0.7589, "step": 1739 }, { "epoch": 0.2328070644902328, "grad_norm": 21.15789031982422, "learning_rate": 7.671929355097671e-07, "loss": 0.5307, "step": 1740 }, { "epoch": 0.23294086165373293, "grad_norm": 24.994171142578125, "learning_rate": 7.67059138346267e-07, "loss": 0.5847, "step": 1741 }, { "epoch": 0.23307465881723308, "grad_norm": 12.79098129272461, "learning_rate": 7.669253411827669e-07, "loss": 0.5421, "step": 1742 }, { "epoch": 0.2332084559807332, "grad_norm": 27.28856658935547, "learning_rate": 7.667915440192668e-07, "loss": 0.5737, "step": 1743 }, { "epoch": 0.23334225314423335, "grad_norm": 22.90030288696289, "learning_rate": 7.666577468557666e-07, "loss": 0.404, "step": 1744 }, { "epoch": 0.23347605030773347, "grad_norm": 18.861509323120117, "learning_rate": 7.665239496922665e-07, "loss": 0.6106, "step": 1745 }, { "epoch": 0.23360984747123362, "grad_norm": 7.453225135803223, "learning_rate": 7.663901525287664e-07, "loss": 0.4676, "step": 1746 }, { "epoch": 0.23374364463473374, "grad_norm": 22.557432174682617, "learning_rate": 7.662563553652663e-07, "loss": 0.4048, "step": 1747 }, { "epoch": 0.2338774417982339, "grad_norm": 9.977246284484863, "learning_rate": 7.66122558201766e-07, "loss": 0.4345, "step": 1748 }, { "epoch": 0.234011238961734, "grad_norm": 14.00601577758789, "learning_rate": 7.659887610382659e-07, "loss": 0.4331, "step": 1749 }, { "epoch": 0.23414503612523416, "grad_norm": 8.775120735168457, "learning_rate": 7.658549638747658e-07, "loss": 0.4908, "step": 1750 }, { "epoch": 0.23427883328873428, "grad_norm": 10.84395694732666, "learning_rate": 7.657211667112656e-07, "loss": 0.4164, "step": 1751 }, { "epoch": 0.2344126304522344, "grad_norm": 19.19474983215332, "learning_rate": 7.655873695477656e-07, "loss": 0.5403, "step": 1752 }, { "epoch": 0.23454642761573455, "grad_norm": 41.35846710205078, "learning_rate": 7.654535723842654e-07, "loss": 0.388, "step": 1753 }, { "epoch": 0.23468022477923467, "grad_norm": 8.804964065551758, "learning_rate": 7.653197752207653e-07, "loss": 0.4545, "step": 1754 }, { "epoch": 0.23481402194273482, "grad_norm": 12.223099708557129, "learning_rate": 7.651859780572651e-07, "loss": 0.5726, "step": 1755 }, { "epoch": 0.23494781910623494, "grad_norm": 14.482186317443848, "learning_rate": 7.65052180893765e-07, "loss": 0.4723, "step": 1756 }, { "epoch": 0.2350816162697351, "grad_norm": 10.057260513305664, "learning_rate": 7.649183837302649e-07, "loss": 0.4513, "step": 1757 }, { "epoch": 0.2352154134332352, "grad_norm": 12.00289535522461, "learning_rate": 7.647845865667647e-07, "loss": 0.5247, "step": 1758 }, { "epoch": 0.23534921059673536, "grad_norm": 31.00125503540039, "learning_rate": 7.646507894032646e-07, "loss": 0.3847, "step": 1759 }, { "epoch": 0.23548300776023548, "grad_norm": 16.481998443603516, "learning_rate": 7.645169922397645e-07, "loss": 0.6045, "step": 1760 }, { "epoch": 0.23561680492373563, "grad_norm": 8.05232048034668, "learning_rate": 7.643831950762644e-07, "loss": 0.4808, "step": 1761 }, { "epoch": 0.23575060208723575, "grad_norm": 9.497845649719238, "learning_rate": 7.642493979127643e-07, "loss": 0.3893, "step": 1762 }, { "epoch": 0.23588439925073587, "grad_norm": 9.914795875549316, "learning_rate": 7.64115600749264e-07, "loss": 0.3728, "step": 1763 }, { "epoch": 0.23601819641423602, "grad_norm": 33.589542388916016, "learning_rate": 7.639818035857639e-07, "loss": 0.3088, "step": 1764 }, { "epoch": 0.23615199357773614, "grad_norm": 45.17683410644531, "learning_rate": 7.638480064222638e-07, "loss": 0.6954, "step": 1765 }, { "epoch": 0.2362857907412363, "grad_norm": 16.858036041259766, "learning_rate": 7.637142092587638e-07, "loss": 0.4758, "step": 1766 }, { "epoch": 0.2364195879047364, "grad_norm": 12.273127555847168, "learning_rate": 7.635804120952635e-07, "loss": 0.4177, "step": 1767 }, { "epoch": 0.23655338506823656, "grad_norm": 6.832027912139893, "learning_rate": 7.634466149317634e-07, "loss": 0.3452, "step": 1768 }, { "epoch": 0.23668718223173668, "grad_norm": 18.53720474243164, "learning_rate": 7.633128177682633e-07, "loss": 0.3882, "step": 1769 }, { "epoch": 0.23682097939523683, "grad_norm": 34.819969177246094, "learning_rate": 7.631790206047632e-07, "loss": 0.6332, "step": 1770 }, { "epoch": 0.23695477655873695, "grad_norm": 15.0814790725708, "learning_rate": 7.63045223441263e-07, "loss": 0.6001, "step": 1771 }, { "epoch": 0.2370885737222371, "grad_norm": 42.27450180053711, "learning_rate": 7.629114262777628e-07, "loss": 0.5906, "step": 1772 }, { "epoch": 0.23722237088573722, "grad_norm": 37.23484802246094, "learning_rate": 7.627776291142627e-07, "loss": 0.5677, "step": 1773 }, { "epoch": 0.23735616804923734, "grad_norm": 22.933401107788086, "learning_rate": 7.626438319507627e-07, "loss": 0.4109, "step": 1774 }, { "epoch": 0.2374899652127375, "grad_norm": 29.32077407836914, "learning_rate": 7.625100347872625e-07, "loss": 0.4339, "step": 1775 }, { "epoch": 0.2376237623762376, "grad_norm": 12.448058128356934, "learning_rate": 7.623762376237624e-07, "loss": 0.5059, "step": 1776 }, { "epoch": 0.23775755953973776, "grad_norm": 30.502391815185547, "learning_rate": 7.622424404602622e-07, "loss": 0.3917, "step": 1777 }, { "epoch": 0.23789135670323788, "grad_norm": 11.772991180419922, "learning_rate": 7.62108643296762e-07, "loss": 0.4861, "step": 1778 }, { "epoch": 0.23802515386673803, "grad_norm": 16.44026756286621, "learning_rate": 7.619748461332619e-07, "loss": 0.5447, "step": 1779 }, { "epoch": 0.23815895103023815, "grad_norm": 22.339059829711914, "learning_rate": 7.618410489697618e-07, "loss": 0.4403, "step": 1780 }, { "epoch": 0.2382927481937383, "grad_norm": 21.03456687927246, "learning_rate": 7.617072518062617e-07, "loss": 0.5656, "step": 1781 }, { "epoch": 0.23842654535723842, "grad_norm": 13.859262466430664, "learning_rate": 7.615734546427615e-07, "loss": 0.6582, "step": 1782 }, { "epoch": 0.23856034252073857, "grad_norm": 14.496819496154785, "learning_rate": 7.614396574792614e-07, "loss": 0.6121, "step": 1783 }, { "epoch": 0.2386941396842387, "grad_norm": 10.3195161819458, "learning_rate": 7.613058603157613e-07, "loss": 0.3835, "step": 1784 }, { "epoch": 0.23882793684773881, "grad_norm": 21.508203506469727, "learning_rate": 7.611720631522612e-07, "loss": 0.299, "step": 1785 }, { "epoch": 0.23896173401123896, "grad_norm": 15.2034273147583, "learning_rate": 7.610382659887609e-07, "loss": 0.5355, "step": 1786 }, { "epoch": 0.23909553117473908, "grad_norm": 30.048208236694336, "learning_rate": 7.609044688252608e-07, "loss": 0.6428, "step": 1787 }, { "epoch": 0.23922932833823923, "grad_norm": 21.56700325012207, "learning_rate": 7.607706716617608e-07, "loss": 0.5884, "step": 1788 }, { "epoch": 0.23936312550173935, "grad_norm": 9.344924926757812, "learning_rate": 7.606368744982607e-07, "loss": 0.4136, "step": 1789 }, { "epoch": 0.2394969226652395, "grad_norm": 11.019752502441406, "learning_rate": 7.605030773347604e-07, "loss": 0.4007, "step": 1790 }, { "epoch": 0.23963071982873962, "grad_norm": 10.09147834777832, "learning_rate": 7.603692801712603e-07, "loss": 0.2681, "step": 1791 }, { "epoch": 0.23976451699223977, "grad_norm": 35.4040641784668, "learning_rate": 7.602354830077602e-07, "loss": 0.5687, "step": 1792 }, { "epoch": 0.2398983141557399, "grad_norm": 21.989391326904297, "learning_rate": 7.601016858442601e-07, "loss": 0.5918, "step": 1793 }, { "epoch": 0.24003211131924004, "grad_norm": 28.012353897094727, "learning_rate": 7.599678886807599e-07, "loss": 0.4057, "step": 1794 }, { "epoch": 0.24016590848274016, "grad_norm": 21.97614097595215, "learning_rate": 7.598340915172597e-07, "loss": 0.3357, "step": 1795 }, { "epoch": 0.24029970564624029, "grad_norm": 34.98719024658203, "learning_rate": 7.597002943537597e-07, "loss": 0.7191, "step": 1796 }, { "epoch": 0.24043350280974043, "grad_norm": 41.12904739379883, "learning_rate": 7.595664971902596e-07, "loss": 0.5203, "step": 1797 }, { "epoch": 0.24056729997324056, "grad_norm": 28.22544288635254, "learning_rate": 7.594327000267594e-07, "loss": 0.4968, "step": 1798 }, { "epoch": 0.2407010971367407, "grad_norm": 9.592473983764648, "learning_rate": 7.592989028632593e-07, "loss": 0.4003, "step": 1799 }, { "epoch": 0.24083489430024083, "grad_norm": 11.630552291870117, "learning_rate": 7.591651056997591e-07, "loss": 0.5112, "step": 1800 }, { "epoch": 0.24096869146374097, "grad_norm": 38.14583206176758, "learning_rate": 7.59031308536259e-07, "loss": 0.4954, "step": 1801 }, { "epoch": 0.2411024886272411, "grad_norm": 11.270364761352539, "learning_rate": 7.588975113727588e-07, "loss": 0.4722, "step": 1802 }, { "epoch": 0.24123628579074125, "grad_norm": 26.107135772705078, "learning_rate": 7.587637142092588e-07, "loss": 0.4823, "step": 1803 }, { "epoch": 0.24137008295424137, "grad_norm": 12.3712797164917, "learning_rate": 7.586299170457587e-07, "loss": 0.328, "step": 1804 }, { "epoch": 0.24150388011774152, "grad_norm": 22.7343692779541, "learning_rate": 7.584961198822584e-07, "loss": 0.2264, "step": 1805 }, { "epoch": 0.24163767728124164, "grad_norm": 9.504876136779785, "learning_rate": 7.583623227187583e-07, "loss": 0.5175, "step": 1806 }, { "epoch": 0.24177147444474179, "grad_norm": 13.001171112060547, "learning_rate": 7.582285255552582e-07, "loss": 0.62, "step": 1807 }, { "epoch": 0.2419052716082419, "grad_norm": 7.7516093254089355, "learning_rate": 7.580947283917581e-07, "loss": 0.4315, "step": 1808 }, { "epoch": 0.24203906877174203, "grad_norm": 9.052969932556152, "learning_rate": 7.579609312282578e-07, "loss": 0.2396, "step": 1809 }, { "epoch": 0.24217286593524218, "grad_norm": 35.019344329833984, "learning_rate": 7.578271340647578e-07, "loss": 0.5091, "step": 1810 }, { "epoch": 0.2423066630987423, "grad_norm": 34.047218322753906, "learning_rate": 7.576933369012577e-07, "loss": 0.4872, "step": 1811 }, { "epoch": 0.24244046026224245, "grad_norm": 11.145223617553711, "learning_rate": 7.575595397377576e-07, "loss": 0.4317, "step": 1812 }, { "epoch": 0.24257425742574257, "grad_norm": 12.527105331420898, "learning_rate": 7.574257425742574e-07, "loss": 0.3317, "step": 1813 }, { "epoch": 0.24270805458924272, "grad_norm": 14.338849067687988, "learning_rate": 7.572919454107572e-07, "loss": 0.2464, "step": 1814 }, { "epoch": 0.24284185175274284, "grad_norm": 35.2110710144043, "learning_rate": 7.571581482472571e-07, "loss": 0.5758, "step": 1815 }, { "epoch": 0.242975648916243, "grad_norm": 23.43071174621582, "learning_rate": 7.57024351083757e-07, "loss": 0.4348, "step": 1816 }, { "epoch": 0.2431094460797431, "grad_norm": 15.457377433776855, "learning_rate": 7.568905539202568e-07, "loss": 0.5457, "step": 1817 }, { "epoch": 0.24324324324324326, "grad_norm": 7.341452598571777, "learning_rate": 7.567567567567568e-07, "loss": 0.2265, "step": 1818 }, { "epoch": 0.24337704040674338, "grad_norm": 31.921831130981445, "learning_rate": 7.566229595932566e-07, "loss": 0.4919, "step": 1819 }, { "epoch": 0.2435108375702435, "grad_norm": 15.532947540283203, "learning_rate": 7.564891624297565e-07, "loss": 0.4056, "step": 1820 }, { "epoch": 0.24364463473374365, "grad_norm": 8.85003662109375, "learning_rate": 7.563553652662563e-07, "loss": 0.2963, "step": 1821 }, { "epoch": 0.24377843189724377, "grad_norm": 15.121420860290527, "learning_rate": 7.562215681027562e-07, "loss": 0.335, "step": 1822 }, { "epoch": 0.24391222906074392, "grad_norm": 12.752775192260742, "learning_rate": 7.56087770939256e-07, "loss": 0.4942, "step": 1823 }, { "epoch": 0.24404602622424404, "grad_norm": 11.403348922729492, "learning_rate": 7.559539737757559e-07, "loss": 0.3064, "step": 1824 }, { "epoch": 0.2441798233877442, "grad_norm": 16.34742546081543, "learning_rate": 7.558201766122558e-07, "loss": 0.5005, "step": 1825 }, { "epoch": 0.2443136205512443, "grad_norm": 18.730186462402344, "learning_rate": 7.556863794487557e-07, "loss": 0.4153, "step": 1826 }, { "epoch": 0.24444741771474446, "grad_norm": 14.484100341796875, "learning_rate": 7.555525822852556e-07, "loss": 0.3823, "step": 1827 }, { "epoch": 0.24458121487824458, "grad_norm": 39.21298599243164, "learning_rate": 7.554187851217554e-07, "loss": 0.8328, "step": 1828 }, { "epoch": 0.24471501204174473, "grad_norm": 10.856181144714355, "learning_rate": 7.552849879582552e-07, "loss": 0.3408, "step": 1829 }, { "epoch": 0.24484880920524485, "grad_norm": 22.42622947692871, "learning_rate": 7.551511907947551e-07, "loss": 0.2602, "step": 1830 }, { "epoch": 0.24498260636874497, "grad_norm": 22.36359214782715, "learning_rate": 7.55017393631255e-07, "loss": 0.5258, "step": 1831 }, { "epoch": 0.24511640353224512, "grad_norm": 23.95978355407715, "learning_rate": 7.548835964677548e-07, "loss": 0.6405, "step": 1832 }, { "epoch": 0.24525020069574524, "grad_norm": 31.186298370361328, "learning_rate": 7.547497993042547e-07, "loss": 0.4642, "step": 1833 }, { "epoch": 0.2453839978592454, "grad_norm": 25.34434700012207, "learning_rate": 7.546160021407546e-07, "loss": 0.4601, "step": 1834 }, { "epoch": 0.2455177950227455, "grad_norm": 24.070310592651367, "learning_rate": 7.544822049772545e-07, "loss": 0.642, "step": 1835 }, { "epoch": 0.24565159218624566, "grad_norm": 19.475536346435547, "learning_rate": 7.543484078137543e-07, "loss": 0.3852, "step": 1836 }, { "epoch": 0.24578538934974578, "grad_norm": 19.916175842285156, "learning_rate": 7.542146106502541e-07, "loss": 0.2494, "step": 1837 }, { "epoch": 0.24591918651324593, "grad_norm": 11.355189323425293, "learning_rate": 7.54080813486754e-07, "loss": 0.3568, "step": 1838 }, { "epoch": 0.24605298367674605, "grad_norm": 16.177736282348633, "learning_rate": 7.539470163232539e-07, "loss": 0.5691, "step": 1839 }, { "epoch": 0.2461867808402462, "grad_norm": 15.776646614074707, "learning_rate": 7.538132191597538e-07, "loss": 0.4808, "step": 1840 }, { "epoch": 0.24632057800374632, "grad_norm": 14.82050895690918, "learning_rate": 7.536794219962537e-07, "loss": 0.3473, "step": 1841 }, { "epoch": 0.24645437516724644, "grad_norm": 9.944158554077148, "learning_rate": 7.535456248327535e-07, "loss": 0.3487, "step": 1842 }, { "epoch": 0.2465881723307466, "grad_norm": 44.647483825683594, "learning_rate": 7.534118276692534e-07, "loss": 0.5964, "step": 1843 }, { "epoch": 0.2467219694942467, "grad_norm": 8.302223205566406, "learning_rate": 7.532780305057532e-07, "loss": 0.3706, "step": 1844 }, { "epoch": 0.24685576665774686, "grad_norm": 26.5152587890625, "learning_rate": 7.531442333422531e-07, "loss": 0.5386, "step": 1845 }, { "epoch": 0.24698956382124698, "grad_norm": 18.48996353149414, "learning_rate": 7.53010436178753e-07, "loss": 0.4239, "step": 1846 }, { "epoch": 0.24712336098474713, "grad_norm": 8.133636474609375, "learning_rate": 7.528766390152529e-07, "loss": 0.3122, "step": 1847 }, { "epoch": 0.24725715814824725, "grad_norm": 26.386451721191406, "learning_rate": 7.527428418517527e-07, "loss": 0.4417, "step": 1848 }, { "epoch": 0.2473909553117474, "grad_norm": 40.242164611816406, "learning_rate": 7.526090446882526e-07, "loss": 0.6245, "step": 1849 }, { "epoch": 0.24752475247524752, "grad_norm": 14.10327434539795, "learning_rate": 7.524752475247525e-07, "loss": 0.5386, "step": 1850 }, { "epoch": 0.24765854963874767, "grad_norm": 13.79064655303955, "learning_rate": 7.523414503612523e-07, "loss": 0.5534, "step": 1851 }, { "epoch": 0.2477923468022478, "grad_norm": 11.596872329711914, "learning_rate": 7.522076531977521e-07, "loss": 0.3182, "step": 1852 }, { "epoch": 0.2479261439657479, "grad_norm": 14.938848495483398, "learning_rate": 7.52073856034252e-07, "loss": 0.5927, "step": 1853 }, { "epoch": 0.24805994112924806, "grad_norm": 13.89151668548584, "learning_rate": 7.51940058870752e-07, "loss": 0.6422, "step": 1854 }, { "epoch": 0.24819373829274818, "grad_norm": 11.951791763305664, "learning_rate": 7.518062617072519e-07, "loss": 0.5269, "step": 1855 }, { "epoch": 0.24832753545624833, "grad_norm": 14.903364181518555, "learning_rate": 7.516724645437516e-07, "loss": 0.2932, "step": 1856 }, { "epoch": 0.24846133261974845, "grad_norm": 28.770917892456055, "learning_rate": 7.515386673802515e-07, "loss": 0.4335, "step": 1857 }, { "epoch": 0.2485951297832486, "grad_norm": 26.922840118408203, "learning_rate": 7.514048702167514e-07, "loss": 0.5519, "step": 1858 }, { "epoch": 0.24872892694674872, "grad_norm": 13.64877700805664, "learning_rate": 7.512710730532512e-07, "loss": 0.5211, "step": 1859 }, { "epoch": 0.24886272411024887, "grad_norm": 20.775583267211914, "learning_rate": 7.51137275889751e-07, "loss": 0.3554, "step": 1860 }, { "epoch": 0.248996521273749, "grad_norm": 9.774858474731445, "learning_rate": 7.510034787262509e-07, "loss": 0.5159, "step": 1861 }, { "epoch": 0.24913031843724914, "grad_norm": 23.00572395324707, "learning_rate": 7.508696815627509e-07, "loss": 0.3669, "step": 1862 }, { "epoch": 0.24926411560074926, "grad_norm": 15.69728946685791, "learning_rate": 7.507358843992507e-07, "loss": 0.6548, "step": 1863 }, { "epoch": 0.24939791276424939, "grad_norm": 9.970571517944336, "learning_rate": 7.506020872357506e-07, "loss": 0.4392, "step": 1864 }, { "epoch": 0.24953170992774953, "grad_norm": 14.107223510742188, "learning_rate": 7.504682900722504e-07, "loss": 0.3147, "step": 1865 }, { "epoch": 0.24966550709124966, "grad_norm": 11.124353408813477, "learning_rate": 7.503344929087503e-07, "loss": 0.4557, "step": 1866 }, { "epoch": 0.2497993042547498, "grad_norm": 13.974453926086426, "learning_rate": 7.502006957452501e-07, "loss": 0.5634, "step": 1867 }, { "epoch": 0.24993310141824993, "grad_norm": 35.88774871826172, "learning_rate": 7.5006689858175e-07, "loss": 0.7717, "step": 1868 }, { "epoch": 0.2500668985817501, "grad_norm": 7.297944068908691, "learning_rate": 7.4993310141825e-07, "loss": 0.4424, "step": 1869 }, { "epoch": 0.2502006957452502, "grad_norm": 10.702960968017578, "learning_rate": 7.497993042547498e-07, "loss": 0.5321, "step": 1870 }, { "epoch": 0.2503344929087503, "grad_norm": 27.11518096923828, "learning_rate": 7.496655070912496e-07, "loss": 0.5837, "step": 1871 }, { "epoch": 0.25046829007225047, "grad_norm": 17.513948440551758, "learning_rate": 7.495317099277495e-07, "loss": 0.451, "step": 1872 }, { "epoch": 0.2506020872357506, "grad_norm": 11.852067947387695, "learning_rate": 7.493979127642494e-07, "loss": 0.3961, "step": 1873 }, { "epoch": 0.25073588439925076, "grad_norm": 49.132991790771484, "learning_rate": 7.492641156007492e-07, "loss": 0.4655, "step": 1874 }, { "epoch": 0.25086968156275086, "grad_norm": 25.906070709228516, "learning_rate": 7.49130318437249e-07, "loss": 0.76, "step": 1875 }, { "epoch": 0.251003478726251, "grad_norm": 10.083237648010254, "learning_rate": 7.48996521273749e-07, "loss": 0.4308, "step": 1876 }, { "epoch": 0.25113727588975115, "grad_norm": 19.221912384033203, "learning_rate": 7.488627241102489e-07, "loss": 0.4686, "step": 1877 }, { "epoch": 0.25127107305325125, "grad_norm": 18.81747817993164, "learning_rate": 7.487289269467488e-07, "loss": 0.5391, "step": 1878 }, { "epoch": 0.2514048702167514, "grad_norm": 18.594219207763672, "learning_rate": 7.485951297832485e-07, "loss": 0.6506, "step": 1879 }, { "epoch": 0.25153866738025155, "grad_norm": 28.466222763061523, "learning_rate": 7.484613326197484e-07, "loss": 0.6504, "step": 1880 }, { "epoch": 0.2516724645437517, "grad_norm": 34.292388916015625, "learning_rate": 7.483275354562483e-07, "loss": 0.5526, "step": 1881 }, { "epoch": 0.2518062617072518, "grad_norm": 16.764541625976562, "learning_rate": 7.481937382927481e-07, "loss": 0.39, "step": 1882 }, { "epoch": 0.25194005887075194, "grad_norm": 32.49657440185547, "learning_rate": 7.48059941129248e-07, "loss": 0.3576, "step": 1883 }, { "epoch": 0.2520738560342521, "grad_norm": 8.235404014587402, "learning_rate": 7.479261439657479e-07, "loss": 0.4436, "step": 1884 }, { "epoch": 0.25220765319775224, "grad_norm": 13.509638786315918, "learning_rate": 7.477923468022478e-07, "loss": 0.5199, "step": 1885 }, { "epoch": 0.25234145036125233, "grad_norm": 12.68442440032959, "learning_rate": 7.476585496387476e-07, "loss": 0.4808, "step": 1886 }, { "epoch": 0.2524752475247525, "grad_norm": 15.492609024047852, "learning_rate": 7.475247524752475e-07, "loss": 0.3977, "step": 1887 }, { "epoch": 0.2526090446882526, "grad_norm": 22.747257232666016, "learning_rate": 7.473909553117473e-07, "loss": 0.4931, "step": 1888 }, { "epoch": 0.2527428418517527, "grad_norm": 12.102765083312988, "learning_rate": 7.472571581482472e-07, "loss": 0.4417, "step": 1889 }, { "epoch": 0.25287663901525287, "grad_norm": 11.466642379760742, "learning_rate": 7.47123360984747e-07, "loss": 0.528, "step": 1890 }, { "epoch": 0.253010436178753, "grad_norm": 13.369180679321289, "learning_rate": 7.46989563821247e-07, "loss": 0.5726, "step": 1891 }, { "epoch": 0.25314423334225317, "grad_norm": 35.52275848388672, "learning_rate": 7.468557666577469e-07, "loss": 0.56, "step": 1892 }, { "epoch": 0.25327803050575326, "grad_norm": 11.377416610717773, "learning_rate": 7.467219694942467e-07, "loss": 0.4961, "step": 1893 }, { "epoch": 0.2534118276692534, "grad_norm": 20.816993713378906, "learning_rate": 7.465881723307465e-07, "loss": 0.4924, "step": 1894 }, { "epoch": 0.25354562483275356, "grad_norm": 15.674543380737305, "learning_rate": 7.464543751672464e-07, "loss": 0.5294, "step": 1895 }, { "epoch": 0.2536794219962537, "grad_norm": 26.417579650878906, "learning_rate": 7.463205780037463e-07, "loss": 0.3584, "step": 1896 }, { "epoch": 0.2538132191597538, "grad_norm": 12.214055061340332, "learning_rate": 7.461867808402462e-07, "loss": 0.4416, "step": 1897 }, { "epoch": 0.25394701632325395, "grad_norm": 10.520315170288086, "learning_rate": 7.46052983676746e-07, "loss": 0.441, "step": 1898 }, { "epoch": 0.2540808134867541, "grad_norm": 24.555770874023438, "learning_rate": 7.459191865132459e-07, "loss": 0.4834, "step": 1899 }, { "epoch": 0.2542146106502542, "grad_norm": 21.61161994934082, "learning_rate": 7.457853893497458e-07, "loss": 0.5613, "step": 1900 }, { "epoch": 0.25434840781375434, "grad_norm": 14.099488258361816, "learning_rate": 7.456515921862457e-07, "loss": 0.5126, "step": 1901 }, { "epoch": 0.2544822049772545, "grad_norm": 23.068723678588867, "learning_rate": 7.455177950227454e-07, "loss": 0.4044, "step": 1902 }, { "epoch": 0.25461600214075464, "grad_norm": 24.027559280395508, "learning_rate": 7.453839978592453e-07, "loss": 0.5216, "step": 1903 }, { "epoch": 0.25474979930425473, "grad_norm": 9.544344902038574, "learning_rate": 7.452502006957452e-07, "loss": 0.4071, "step": 1904 }, { "epoch": 0.2548835964677549, "grad_norm": 20.157787322998047, "learning_rate": 7.451164035322451e-07, "loss": 0.4487, "step": 1905 }, { "epoch": 0.25501739363125503, "grad_norm": 29.541223526000977, "learning_rate": 7.44982606368745e-07, "loss": 0.3838, "step": 1906 }, { "epoch": 0.2551511907947552, "grad_norm": 20.09084701538086, "learning_rate": 7.448488092052448e-07, "loss": 0.3982, "step": 1907 }, { "epoch": 0.25528498795825527, "grad_norm": 20.86806869506836, "learning_rate": 7.447150120417447e-07, "loss": 0.5558, "step": 1908 }, { "epoch": 0.2554187851217554, "grad_norm": 24.344039916992188, "learning_rate": 7.445812148782445e-07, "loss": 0.3822, "step": 1909 }, { "epoch": 0.25555258228525557, "grad_norm": 30.635812759399414, "learning_rate": 7.444474177147444e-07, "loss": 0.6287, "step": 1910 }, { "epoch": 0.25568637944875566, "grad_norm": 16.032119750976562, "learning_rate": 7.443136205512443e-07, "loss": 0.5634, "step": 1911 }, { "epoch": 0.2558201766122558, "grad_norm": 11.715913772583008, "learning_rate": 7.441798233877441e-07, "loss": 0.3749, "step": 1912 }, { "epoch": 0.25595397377575596, "grad_norm": 17.729883193969727, "learning_rate": 7.44046026224244e-07, "loss": 0.4557, "step": 1913 }, { "epoch": 0.2560877709392561, "grad_norm": 13.51302719116211, "learning_rate": 7.439122290607439e-07, "loss": 0.4067, "step": 1914 }, { "epoch": 0.2562215681027562, "grad_norm": 13.89850902557373, "learning_rate": 7.437784318972438e-07, "loss": 0.4856, "step": 1915 }, { "epoch": 0.25635536526625635, "grad_norm": 29.9558162689209, "learning_rate": 7.436446347337436e-07, "loss": 0.2057, "step": 1916 }, { "epoch": 0.2564891624297565, "grad_norm": 15.489908218383789, "learning_rate": 7.435108375702434e-07, "loss": 0.5086, "step": 1917 }, { "epoch": 0.25662295959325665, "grad_norm": 40.31825637817383, "learning_rate": 7.433770404067433e-07, "loss": 0.5611, "step": 1918 }, { "epoch": 0.25675675675675674, "grad_norm": 35.7531852722168, "learning_rate": 7.432432432432432e-07, "loss": 0.5137, "step": 1919 }, { "epoch": 0.2568905539202569, "grad_norm": 34.15658187866211, "learning_rate": 7.431094460797432e-07, "loss": 0.3398, "step": 1920 }, { "epoch": 0.25702435108375704, "grad_norm": 14.255366325378418, "learning_rate": 7.429756489162429e-07, "loss": 0.6536, "step": 1921 }, { "epoch": 0.25715814824725713, "grad_norm": 36.965816497802734, "learning_rate": 7.428418517527428e-07, "loss": 0.6236, "step": 1922 }, { "epoch": 0.2572919454107573, "grad_norm": 16.56071662902832, "learning_rate": 7.427080545892427e-07, "loss": 0.4777, "step": 1923 }, { "epoch": 0.25742574257425743, "grad_norm": 33.140281677246094, "learning_rate": 7.425742574257426e-07, "loss": 0.4634, "step": 1924 }, { "epoch": 0.2575595397377576, "grad_norm": 13.242956161499023, "learning_rate": 7.424404602622424e-07, "loss": 0.5895, "step": 1925 }, { "epoch": 0.2576933369012577, "grad_norm": 12.626018524169922, "learning_rate": 7.423066630987422e-07, "loss": 0.4946, "step": 1926 }, { "epoch": 0.2578271340647578, "grad_norm": 26.025707244873047, "learning_rate": 7.421728659352421e-07, "loss": 0.4453, "step": 1927 }, { "epoch": 0.257960931228258, "grad_norm": 14.14207935333252, "learning_rate": 7.420390687717421e-07, "loss": 0.4773, "step": 1928 }, { "epoch": 0.2580947283917581, "grad_norm": 22.366731643676758, "learning_rate": 7.419052716082419e-07, "loss": 0.4227, "step": 1929 }, { "epoch": 0.2582285255552582, "grad_norm": 15.931800842285156, "learning_rate": 7.417714744447417e-07, "loss": 0.5653, "step": 1930 }, { "epoch": 0.25836232271875836, "grad_norm": 14.06508731842041, "learning_rate": 7.416376772812416e-07, "loss": 0.6097, "step": 1931 }, { "epoch": 0.2584961198822585, "grad_norm": 23.78263282775879, "learning_rate": 7.415038801177415e-07, "loss": 0.3966, "step": 1932 }, { "epoch": 0.2586299170457586, "grad_norm": 12.681154251098633, "learning_rate": 7.413700829542413e-07, "loss": 0.4912, "step": 1933 }, { "epoch": 0.25876371420925875, "grad_norm": 31.736906051635742, "learning_rate": 7.412362857907412e-07, "loss": 0.316, "step": 1934 }, { "epoch": 0.2588975113727589, "grad_norm": 15.419285774230957, "learning_rate": 7.411024886272411e-07, "loss": 0.5901, "step": 1935 }, { "epoch": 0.25903130853625905, "grad_norm": 12.219127655029297, "learning_rate": 7.409686914637409e-07, "loss": 0.4268, "step": 1936 }, { "epoch": 0.25916510569975915, "grad_norm": 12.247611045837402, "learning_rate": 7.408348943002408e-07, "loss": 0.3105, "step": 1937 }, { "epoch": 0.2592989028632593, "grad_norm": 35.68191909790039, "learning_rate": 7.407010971367407e-07, "loss": 0.7311, "step": 1938 }, { "epoch": 0.25943270002675944, "grad_norm": 22.79810905456543, "learning_rate": 7.405672999732406e-07, "loss": 0.5584, "step": 1939 }, { "epoch": 0.2595664971902596, "grad_norm": 17.455726623535156, "learning_rate": 7.404335028097403e-07, "loss": 0.4641, "step": 1940 }, { "epoch": 0.2597002943537597, "grad_norm": 12.386630058288574, "learning_rate": 7.402997056462402e-07, "loss": 0.5134, "step": 1941 }, { "epoch": 0.25983409151725984, "grad_norm": 16.65159034729004, "learning_rate": 7.401659084827402e-07, "loss": 0.4013, "step": 1942 }, { "epoch": 0.25996788868076, "grad_norm": 20.92789077758789, "learning_rate": 7.400321113192401e-07, "loss": 0.4971, "step": 1943 }, { "epoch": 0.2601016858442601, "grad_norm": 17.47957992553711, "learning_rate": 7.398983141557398e-07, "loss": 0.4925, "step": 1944 }, { "epoch": 0.2602354830077602, "grad_norm": 34.503570556640625, "learning_rate": 7.397645169922397e-07, "loss": 0.545, "step": 1945 }, { "epoch": 0.2603692801712604, "grad_norm": 16.189987182617188, "learning_rate": 7.396307198287396e-07, "loss": 0.3572, "step": 1946 }, { "epoch": 0.2605030773347605, "grad_norm": 25.687131881713867, "learning_rate": 7.394969226652395e-07, "loss": 0.3108, "step": 1947 }, { "epoch": 0.2606368744982606, "grad_norm": 21.20334815979004, "learning_rate": 7.393631255017393e-07, "loss": 0.5618, "step": 1948 }, { "epoch": 0.26077067166176077, "grad_norm": 10.089000701904297, "learning_rate": 7.392293283382391e-07, "loss": 0.4998, "step": 1949 }, { "epoch": 0.2609044688252609, "grad_norm": 10.440918922424316, "learning_rate": 7.390955311747391e-07, "loss": 0.3521, "step": 1950 }, { "epoch": 0.26103826598876106, "grad_norm": 10.182151794433594, "learning_rate": 7.38961734011239e-07, "loss": 0.4132, "step": 1951 }, { "epoch": 0.26117206315226116, "grad_norm": 10.262124061584473, "learning_rate": 7.388279368477388e-07, "loss": 0.4286, "step": 1952 }, { "epoch": 0.2613058603157613, "grad_norm": 15.053672790527344, "learning_rate": 7.386941396842387e-07, "loss": 0.348, "step": 1953 }, { "epoch": 0.26143965747926146, "grad_norm": 9.356261253356934, "learning_rate": 7.385603425207385e-07, "loss": 0.3269, "step": 1954 }, { "epoch": 0.26157345464276155, "grad_norm": 13.887105941772461, "learning_rate": 7.384265453572384e-07, "loss": 0.4539, "step": 1955 }, { "epoch": 0.2617072518062617, "grad_norm": 29.812602996826172, "learning_rate": 7.382927481937382e-07, "loss": 0.5726, "step": 1956 }, { "epoch": 0.26184104896976185, "grad_norm": 29.96648406982422, "learning_rate": 7.381589510302382e-07, "loss": 0.4808, "step": 1957 }, { "epoch": 0.261974846133262, "grad_norm": 15.466158866882324, "learning_rate": 7.38025153866738e-07, "loss": 0.3956, "step": 1958 }, { "epoch": 0.2621086432967621, "grad_norm": 19.334522247314453, "learning_rate": 7.378913567032379e-07, "loss": 0.5562, "step": 1959 }, { "epoch": 0.26224244046026224, "grad_norm": 14.593931198120117, "learning_rate": 7.377575595397377e-07, "loss": 0.1963, "step": 1960 }, { "epoch": 0.2623762376237624, "grad_norm": 23.222562789916992, "learning_rate": 7.376237623762376e-07, "loss": 0.4245, "step": 1961 }, { "epoch": 0.26251003478726254, "grad_norm": 18.69036293029785, "learning_rate": 7.374899652127375e-07, "loss": 0.4157, "step": 1962 }, { "epoch": 0.26264383195076263, "grad_norm": 17.610414505004883, "learning_rate": 7.373561680492372e-07, "loss": 0.5336, "step": 1963 }, { "epoch": 0.2627776291142628, "grad_norm": 16.56601333618164, "learning_rate": 7.372223708857372e-07, "loss": 0.3739, "step": 1964 }, { "epoch": 0.2629114262777629, "grad_norm": 20.633459091186523, "learning_rate": 7.370885737222371e-07, "loss": 0.5685, "step": 1965 }, { "epoch": 0.263045223441263, "grad_norm": 19.95663833618164, "learning_rate": 7.36954776558737e-07, "loss": 0.3845, "step": 1966 }, { "epoch": 0.26317902060476317, "grad_norm": 27.54633140563965, "learning_rate": 7.368209793952368e-07, "loss": 0.4128, "step": 1967 }, { "epoch": 0.2633128177682633, "grad_norm": 16.220535278320312, "learning_rate": 7.366871822317366e-07, "loss": 0.4953, "step": 1968 }, { "epoch": 0.26344661493176347, "grad_norm": 14.929316520690918, "learning_rate": 7.365533850682365e-07, "loss": 0.5406, "step": 1969 }, { "epoch": 0.26358041209526356, "grad_norm": 29.111774444580078, "learning_rate": 7.364195879047364e-07, "loss": 0.4722, "step": 1970 }, { "epoch": 0.2637142092587637, "grad_norm": 13.621489524841309, "learning_rate": 7.362857907412362e-07, "loss": 0.3137, "step": 1971 }, { "epoch": 0.26384800642226386, "grad_norm": 19.696372985839844, "learning_rate": 7.361519935777361e-07, "loss": 0.5148, "step": 1972 }, { "epoch": 0.263981803585764, "grad_norm": 28.64935874938965, "learning_rate": 7.36018196414236e-07, "loss": 0.4262, "step": 1973 }, { "epoch": 0.2641156007492641, "grad_norm": 24.478824615478516, "learning_rate": 7.358843992507359e-07, "loss": 0.4659, "step": 1974 }, { "epoch": 0.26424939791276425, "grad_norm": 31.343151092529297, "learning_rate": 7.357506020872357e-07, "loss": 0.6602, "step": 1975 }, { "epoch": 0.2643831950762644, "grad_norm": 27.709291458129883, "learning_rate": 7.356168049237356e-07, "loss": 0.3525, "step": 1976 }, { "epoch": 0.2645169922397645, "grad_norm": 24.92307472229004, "learning_rate": 7.354830077602354e-07, "loss": 0.6136, "step": 1977 }, { "epoch": 0.26465078940326464, "grad_norm": 10.415827751159668, "learning_rate": 7.353492105967353e-07, "loss": 0.3697, "step": 1978 }, { "epoch": 0.2647845865667648, "grad_norm": 19.81089973449707, "learning_rate": 7.352154134332352e-07, "loss": 0.5334, "step": 1979 }, { "epoch": 0.26491838373026494, "grad_norm": 35.893802642822266, "learning_rate": 7.350816162697351e-07, "loss": 0.5775, "step": 1980 }, { "epoch": 0.26505218089376503, "grad_norm": 19.713367462158203, "learning_rate": 7.34947819106235e-07, "loss": 0.7589, "step": 1981 }, { "epoch": 0.2651859780572652, "grad_norm": 12.571880340576172, "learning_rate": 7.348140219427348e-07, "loss": 0.405, "step": 1982 }, { "epoch": 0.26531977522076533, "grad_norm": 15.117097854614258, "learning_rate": 7.346802247792346e-07, "loss": 0.5002, "step": 1983 }, { "epoch": 0.2654535723842655, "grad_norm": 28.64021110534668, "learning_rate": 7.345464276157345e-07, "loss": 0.3499, "step": 1984 }, { "epoch": 0.2655873695477656, "grad_norm": 36.28199005126953, "learning_rate": 7.344126304522344e-07, "loss": 0.3872, "step": 1985 }, { "epoch": 0.2657211667112657, "grad_norm": 13.618128776550293, "learning_rate": 7.342788332887344e-07, "loss": 0.4665, "step": 1986 }, { "epoch": 0.26585496387476587, "grad_norm": 10.020683288574219, "learning_rate": 7.341450361252341e-07, "loss": 0.4467, "step": 1987 }, { "epoch": 0.26598876103826596, "grad_norm": 31.487348556518555, "learning_rate": 7.34011238961734e-07, "loss": 0.4231, "step": 1988 }, { "epoch": 0.2661225582017661, "grad_norm": 27.58275032043457, "learning_rate": 7.338774417982339e-07, "loss": 0.6299, "step": 1989 }, { "epoch": 0.26625635536526626, "grad_norm": 21.76142692565918, "learning_rate": 7.337436446347337e-07, "loss": 0.5621, "step": 1990 }, { "epoch": 0.2663901525287664, "grad_norm": 16.088815689086914, "learning_rate": 7.336098474712335e-07, "loss": 0.6776, "step": 1991 }, { "epoch": 0.2665239496922665, "grad_norm": 47.56387710571289, "learning_rate": 7.334760503077334e-07, "loss": 0.5361, "step": 1992 }, { "epoch": 0.26665774685576665, "grad_norm": 27.059223175048828, "learning_rate": 7.333422531442333e-07, "loss": 0.4533, "step": 1993 }, { "epoch": 0.2667915440192668, "grad_norm": 15.649812698364258, "learning_rate": 7.332084559807332e-07, "loss": 0.3208, "step": 1994 }, { "epoch": 0.26692534118276695, "grad_norm": 21.210927963256836, "learning_rate": 7.33074658817233e-07, "loss": 0.3482, "step": 1995 }, { "epoch": 0.26705913834626704, "grad_norm": 16.386308670043945, "learning_rate": 7.329408616537329e-07, "loss": 0.4214, "step": 1996 }, { "epoch": 0.2671929355097672, "grad_norm": 14.429173469543457, "learning_rate": 7.328070644902328e-07, "loss": 0.5587, "step": 1997 }, { "epoch": 0.26732673267326734, "grad_norm": 12.599518775939941, "learning_rate": 7.326732673267326e-07, "loss": 0.5778, "step": 1998 }, { "epoch": 0.26746052983676744, "grad_norm": 9.659900665283203, "learning_rate": 7.325394701632325e-07, "loss": 0.4551, "step": 1999 }, { "epoch": 0.2675943270002676, "grad_norm": 26.296754837036133, "learning_rate": 7.324056729997323e-07, "loss": 0.4094, "step": 2000 }, { "epoch": 0.26772812416376773, "grad_norm": 23.392566680908203, "learning_rate": 7.322718758362323e-07, "loss": 0.5633, "step": 2001 }, { "epoch": 0.2678619213272679, "grad_norm": 41.098297119140625, "learning_rate": 7.321380786727321e-07, "loss": 0.6196, "step": 2002 }, { "epoch": 0.267995718490768, "grad_norm": 29.598628997802734, "learning_rate": 7.32004281509232e-07, "loss": 0.5884, "step": 2003 }, { "epoch": 0.2681295156542681, "grad_norm": 16.880578994750977, "learning_rate": 7.318704843457319e-07, "loss": 0.5218, "step": 2004 }, { "epoch": 0.2682633128177683, "grad_norm": 28.776445388793945, "learning_rate": 7.317366871822317e-07, "loss": 0.5238, "step": 2005 }, { "epoch": 0.2683971099812684, "grad_norm": 23.13021469116211, "learning_rate": 7.316028900187315e-07, "loss": 0.4927, "step": 2006 }, { "epoch": 0.2685309071447685, "grad_norm": 25.899282455444336, "learning_rate": 7.314690928552314e-07, "loss": 0.3753, "step": 2007 }, { "epoch": 0.26866470430826866, "grad_norm": 8.862085342407227, "learning_rate": 7.313352956917314e-07, "loss": 0.4382, "step": 2008 }, { "epoch": 0.2687985014717688, "grad_norm": 16.443496704101562, "learning_rate": 7.312014985282313e-07, "loss": 0.378, "step": 2009 }, { "epoch": 0.2689322986352689, "grad_norm": 16.24243927001953, "learning_rate": 7.31067701364731e-07, "loss": 0.4806, "step": 2010 }, { "epoch": 0.26906609579876906, "grad_norm": 41.89490509033203, "learning_rate": 7.309339042012309e-07, "loss": 0.5062, "step": 2011 }, { "epoch": 0.2691998929622692, "grad_norm": 13.471122741699219, "learning_rate": 7.308001070377308e-07, "loss": 0.6573, "step": 2012 }, { "epoch": 0.26933369012576935, "grad_norm": 26.823013305664062, "learning_rate": 7.306663098742306e-07, "loss": 0.5483, "step": 2013 }, { "epoch": 0.26946748728926945, "grad_norm": 40.643428802490234, "learning_rate": 7.305325127107304e-07, "loss": 0.454, "step": 2014 }, { "epoch": 0.2696012844527696, "grad_norm": 19.372272491455078, "learning_rate": 7.303987155472303e-07, "loss": 0.5764, "step": 2015 }, { "epoch": 0.26973508161626975, "grad_norm": 27.429990768432617, "learning_rate": 7.302649183837303e-07, "loss": 0.6201, "step": 2016 }, { "epoch": 0.2698688787797699, "grad_norm": 15.613408088684082, "learning_rate": 7.301311212202301e-07, "loss": 0.45, "step": 2017 }, { "epoch": 0.27000267594327, "grad_norm": 36.5470085144043, "learning_rate": 7.2999732405673e-07, "loss": 0.6447, "step": 2018 }, { "epoch": 0.27013647310677014, "grad_norm": 16.627979278564453, "learning_rate": 7.298635268932298e-07, "loss": 0.4547, "step": 2019 }, { "epoch": 0.2702702702702703, "grad_norm": 11.185113906860352, "learning_rate": 7.297297297297297e-07, "loss": 0.4886, "step": 2020 }, { "epoch": 0.2704040674337704, "grad_norm": 10.718931198120117, "learning_rate": 7.295959325662295e-07, "loss": 0.4713, "step": 2021 }, { "epoch": 0.2705378645972705, "grad_norm": 13.794297218322754, "learning_rate": 7.294621354027294e-07, "loss": 0.5894, "step": 2022 }, { "epoch": 0.2706716617607707, "grad_norm": 12.850915908813477, "learning_rate": 7.293283382392294e-07, "loss": 0.5609, "step": 2023 }, { "epoch": 0.2708054589242708, "grad_norm": 13.927427291870117, "learning_rate": 7.291945410757292e-07, "loss": 0.4964, "step": 2024 }, { "epoch": 0.2709392560877709, "grad_norm": 14.419535636901855, "learning_rate": 7.29060743912229e-07, "loss": 0.2464, "step": 2025 }, { "epoch": 0.27107305325127107, "grad_norm": 12.46084976196289, "learning_rate": 7.289269467487289e-07, "loss": 0.3202, "step": 2026 }, { "epoch": 0.2712068504147712, "grad_norm": 17.55447769165039, "learning_rate": 7.287931495852288e-07, "loss": 0.4682, "step": 2027 }, { "epoch": 0.27134064757827137, "grad_norm": 31.0543212890625, "learning_rate": 7.286593524217286e-07, "loss": 0.589, "step": 2028 }, { "epoch": 0.27147444474177146, "grad_norm": 15.628173828125, "learning_rate": 7.285255552582284e-07, "loss": 0.6081, "step": 2029 }, { "epoch": 0.2716082419052716, "grad_norm": 10.67628002166748, "learning_rate": 7.283917580947283e-07, "loss": 0.4344, "step": 2030 }, { "epoch": 0.27174203906877176, "grad_norm": 16.443660736083984, "learning_rate": 7.282579609312283e-07, "loss": 0.5059, "step": 2031 }, { "epoch": 0.27187583623227185, "grad_norm": 16.577632904052734, "learning_rate": 7.281241637677282e-07, "loss": 0.3694, "step": 2032 }, { "epoch": 0.272009633395772, "grad_norm": 13.935458183288574, "learning_rate": 7.279903666042279e-07, "loss": 0.5009, "step": 2033 }, { "epoch": 0.27214343055927215, "grad_norm": 29.214305877685547, "learning_rate": 7.278565694407278e-07, "loss": 0.6758, "step": 2034 }, { "epoch": 0.2722772277227723, "grad_norm": 11.558882713317871, "learning_rate": 7.277227722772277e-07, "loss": 0.4743, "step": 2035 }, { "epoch": 0.2724110248862724, "grad_norm": 31.295917510986328, "learning_rate": 7.275889751137276e-07, "loss": 0.2635, "step": 2036 }, { "epoch": 0.27254482204977254, "grad_norm": 10.16881275177002, "learning_rate": 7.274551779502273e-07, "loss": 0.3691, "step": 2037 }, { "epoch": 0.2726786192132727, "grad_norm": 31.520095825195312, "learning_rate": 7.273213807867273e-07, "loss": 0.4874, "step": 2038 }, { "epoch": 0.27281241637677284, "grad_norm": 11.656639099121094, "learning_rate": 7.271875836232272e-07, "loss": 0.4212, "step": 2039 }, { "epoch": 0.27294621354027293, "grad_norm": 25.024213790893555, "learning_rate": 7.27053786459727e-07, "loss": 0.5977, "step": 2040 }, { "epoch": 0.2730800107037731, "grad_norm": 8.37231159210205, "learning_rate": 7.269199892962269e-07, "loss": 0.3646, "step": 2041 }, { "epoch": 0.27321380786727323, "grad_norm": 24.967458724975586, "learning_rate": 7.267861921327267e-07, "loss": 0.5476, "step": 2042 }, { "epoch": 0.2733476050307733, "grad_norm": 19.606605529785156, "learning_rate": 7.266523949692266e-07, "loss": 0.4227, "step": 2043 }, { "epoch": 0.27348140219427347, "grad_norm": 26.83592987060547, "learning_rate": 7.265185978057264e-07, "loss": 0.4763, "step": 2044 }, { "epoch": 0.2736151993577736, "grad_norm": 34.52701950073242, "learning_rate": 7.263848006422264e-07, "loss": 0.436, "step": 2045 }, { "epoch": 0.27374899652127377, "grad_norm": 31.176862716674805, "learning_rate": 7.262510034787263e-07, "loss": 0.2564, "step": 2046 }, { "epoch": 0.27388279368477386, "grad_norm": 19.44671630859375, "learning_rate": 7.261172063152261e-07, "loss": 0.6097, "step": 2047 }, { "epoch": 0.274016590848274, "grad_norm": 11.859858512878418, "learning_rate": 7.259834091517259e-07, "loss": 0.3976, "step": 2048 }, { "epoch": 0.27415038801177416, "grad_norm": 14.232270240783691, "learning_rate": 7.258496119882258e-07, "loss": 0.4575, "step": 2049 }, { "epoch": 0.2742841851752743, "grad_norm": 11.551427841186523, "learning_rate": 7.257158148247257e-07, "loss": 0.3247, "step": 2050 }, { "epoch": 0.2744179823387744, "grad_norm": 29.408470153808594, "learning_rate": 7.255820176612256e-07, "loss": 0.3538, "step": 2051 }, { "epoch": 0.27455177950227455, "grad_norm": 19.203655242919922, "learning_rate": 7.254482204977253e-07, "loss": 0.445, "step": 2052 }, { "epoch": 0.2746855766657747, "grad_norm": 18.157817840576172, "learning_rate": 7.253144233342253e-07, "loss": 0.3938, "step": 2053 }, { "epoch": 0.2748193738292748, "grad_norm": 20.820066452026367, "learning_rate": 7.251806261707252e-07, "loss": 0.5055, "step": 2054 }, { "epoch": 0.27495317099277494, "grad_norm": 22.43090057373047, "learning_rate": 7.250468290072251e-07, "loss": 0.6319, "step": 2055 }, { "epoch": 0.2750869681562751, "grad_norm": 34.59203338623047, "learning_rate": 7.249130318437248e-07, "loss": 0.6391, "step": 2056 }, { "epoch": 0.27522076531977524, "grad_norm": 23.132831573486328, "learning_rate": 7.247792346802247e-07, "loss": 0.367, "step": 2057 }, { "epoch": 0.27535456248327533, "grad_norm": 15.783576011657715, "learning_rate": 7.246454375167246e-07, "loss": 0.4428, "step": 2058 }, { "epoch": 0.2754883596467755, "grad_norm": 19.266481399536133, "learning_rate": 7.245116403532245e-07, "loss": 0.5692, "step": 2059 }, { "epoch": 0.27562215681027563, "grad_norm": 21.02212905883789, "learning_rate": 7.243778431897244e-07, "loss": 0.5769, "step": 2060 }, { "epoch": 0.2757559539737758, "grad_norm": 10.147848129272461, "learning_rate": 7.242440460262242e-07, "loss": 0.4175, "step": 2061 }, { "epoch": 0.2758897511372759, "grad_norm": 15.47460651397705, "learning_rate": 7.241102488627241e-07, "loss": 0.5209, "step": 2062 }, { "epoch": 0.276023548300776, "grad_norm": 22.56381607055664, "learning_rate": 7.23976451699224e-07, "loss": 0.3303, "step": 2063 }, { "epoch": 0.27615734546427617, "grad_norm": 25.67489242553711, "learning_rate": 7.238426545357238e-07, "loss": 0.6138, "step": 2064 }, { "epoch": 0.27629114262777626, "grad_norm": 18.0056095123291, "learning_rate": 7.237088573722236e-07, "loss": 0.4501, "step": 2065 }, { "epoch": 0.2764249397912764, "grad_norm": 34.710479736328125, "learning_rate": 7.235750602087235e-07, "loss": 0.2997, "step": 2066 }, { "epoch": 0.27655873695477656, "grad_norm": 11.497068405151367, "learning_rate": 7.234412630452234e-07, "loss": 0.3782, "step": 2067 }, { "epoch": 0.2766925341182767, "grad_norm": 37.77098083496094, "learning_rate": 7.233074658817233e-07, "loss": 0.6833, "step": 2068 }, { "epoch": 0.2768263312817768, "grad_norm": 19.906749725341797, "learning_rate": 7.231736687182232e-07, "loss": 0.3507, "step": 2069 }, { "epoch": 0.27696012844527695, "grad_norm": 27.230512619018555, "learning_rate": 7.23039871554723e-07, "loss": 0.4226, "step": 2070 }, { "epoch": 0.2770939256087771, "grad_norm": 15.036541938781738, "learning_rate": 7.229060743912228e-07, "loss": 0.3749, "step": 2071 }, { "epoch": 0.27722772277227725, "grad_norm": 17.78680419921875, "learning_rate": 7.227722772277227e-07, "loss": 0.4151, "step": 2072 }, { "epoch": 0.27736151993577735, "grad_norm": 12.360733985900879, "learning_rate": 7.226384800642226e-07, "loss": 0.3471, "step": 2073 }, { "epoch": 0.2774953170992775, "grad_norm": 30.827835083007812, "learning_rate": 7.225046829007225e-07, "loss": 0.5648, "step": 2074 }, { "epoch": 0.27762911426277764, "grad_norm": 18.7376708984375, "learning_rate": 7.223708857372223e-07, "loss": 0.4324, "step": 2075 }, { "epoch": 0.27776291142627774, "grad_norm": 37.57206726074219, "learning_rate": 7.222370885737222e-07, "loss": 0.669, "step": 2076 }, { "epoch": 0.2778967085897779, "grad_norm": 14.675695419311523, "learning_rate": 7.221032914102221e-07, "loss": 0.3756, "step": 2077 }, { "epoch": 0.27803050575327803, "grad_norm": 14.762290000915527, "learning_rate": 7.21969494246722e-07, "loss": 0.5128, "step": 2078 }, { "epoch": 0.2781643029167782, "grad_norm": 19.5593204498291, "learning_rate": 7.218356970832217e-07, "loss": 0.4014, "step": 2079 }, { "epoch": 0.2782981000802783, "grad_norm": 18.329837799072266, "learning_rate": 7.217018999197216e-07, "loss": 0.5838, "step": 2080 }, { "epoch": 0.2784318972437784, "grad_norm": 8.208239555358887, "learning_rate": 7.215681027562215e-07, "loss": 0.3834, "step": 2081 }, { "epoch": 0.2785656944072786, "grad_norm": 28.173877716064453, "learning_rate": 7.214343055927215e-07, "loss": 0.644, "step": 2082 }, { "epoch": 0.2786994915707787, "grad_norm": 27.992835998535156, "learning_rate": 7.213005084292213e-07, "loss": 0.5227, "step": 2083 }, { "epoch": 0.2788332887342788, "grad_norm": 12.613121032714844, "learning_rate": 7.211667112657211e-07, "loss": 0.357, "step": 2084 }, { "epoch": 0.27896708589777897, "grad_norm": 12.329477310180664, "learning_rate": 7.21032914102221e-07, "loss": 0.3451, "step": 2085 }, { "epoch": 0.2791008830612791, "grad_norm": 15.634750366210938, "learning_rate": 7.208991169387209e-07, "loss": 0.477, "step": 2086 }, { "epoch": 0.2792346802247792, "grad_norm": 19.144041061401367, "learning_rate": 7.207653197752207e-07, "loss": 0.379, "step": 2087 }, { "epoch": 0.27936847738827936, "grad_norm": 41.31723403930664, "learning_rate": 7.206315226117206e-07, "loss": 0.5361, "step": 2088 }, { "epoch": 0.2795022745517795, "grad_norm": 14.98243236541748, "learning_rate": 7.204977254482205e-07, "loss": 0.3827, "step": 2089 }, { "epoch": 0.27963607171527965, "grad_norm": 21.119152069091797, "learning_rate": 7.203639282847204e-07, "loss": 0.3185, "step": 2090 }, { "epoch": 0.27976986887877975, "grad_norm": 27.910566329956055, "learning_rate": 7.202301311212202e-07, "loss": 0.5991, "step": 2091 }, { "epoch": 0.2799036660422799, "grad_norm": 21.716032028198242, "learning_rate": 7.200963339577201e-07, "loss": 0.4752, "step": 2092 }, { "epoch": 0.28003746320578005, "grad_norm": 10.478561401367188, "learning_rate": 7.1996253679422e-07, "loss": 0.268, "step": 2093 }, { "epoch": 0.2801712603692802, "grad_norm": 14.794822692871094, "learning_rate": 7.198287396307197e-07, "loss": 0.4462, "step": 2094 }, { "epoch": 0.2803050575327803, "grad_norm": 41.525634765625, "learning_rate": 7.196949424672196e-07, "loss": 0.647, "step": 2095 }, { "epoch": 0.28043885469628044, "grad_norm": 12.760453224182129, "learning_rate": 7.195611453037195e-07, "loss": 0.4738, "step": 2096 }, { "epoch": 0.2805726518597806, "grad_norm": 14.574471473693848, "learning_rate": 7.194273481402195e-07, "loss": 0.4039, "step": 2097 }, { "epoch": 0.2807064490232807, "grad_norm": 24.31338119506836, "learning_rate": 7.192935509767192e-07, "loss": 0.2664, "step": 2098 }, { "epoch": 0.28084024618678083, "grad_norm": 21.093441009521484, "learning_rate": 7.191597538132191e-07, "loss": 0.3372, "step": 2099 }, { "epoch": 0.280974043350281, "grad_norm": 28.65317726135254, "learning_rate": 7.19025956649719e-07, "loss": 0.5312, "step": 2100 }, { "epoch": 0.2811078405137811, "grad_norm": 15.461917877197266, "learning_rate": 7.188921594862189e-07, "loss": 0.3781, "step": 2101 }, { "epoch": 0.2812416376772812, "grad_norm": 20.215988159179688, "learning_rate": 7.187583623227187e-07, "loss": 0.6006, "step": 2102 }, { "epoch": 0.28137543484078137, "grad_norm": 26.26686668395996, "learning_rate": 7.186245651592185e-07, "loss": 0.5244, "step": 2103 }, { "epoch": 0.2815092320042815, "grad_norm": 20.713237762451172, "learning_rate": 7.184907679957185e-07, "loss": 0.5075, "step": 2104 }, { "epoch": 0.28164302916778167, "grad_norm": 15.710198402404785, "learning_rate": 7.183569708322184e-07, "loss": 0.2705, "step": 2105 }, { "epoch": 0.28177682633128176, "grad_norm": 24.8763427734375, "learning_rate": 7.182231736687182e-07, "loss": 0.3841, "step": 2106 }, { "epoch": 0.2819106234947819, "grad_norm": 13.817499160766602, "learning_rate": 7.18089376505218e-07, "loss": 0.4852, "step": 2107 }, { "epoch": 0.28204442065828206, "grad_norm": 19.539718627929688, "learning_rate": 7.179555793417179e-07, "loss": 0.4825, "step": 2108 }, { "epoch": 0.28217821782178215, "grad_norm": 28.812808990478516, "learning_rate": 7.178217821782178e-07, "loss": 0.3664, "step": 2109 }, { "epoch": 0.2823120149852823, "grad_norm": 26.69768524169922, "learning_rate": 7.176879850147176e-07, "loss": 0.6404, "step": 2110 }, { "epoch": 0.28244581214878245, "grad_norm": 16.621387481689453, "learning_rate": 7.175541878512176e-07, "loss": 0.3679, "step": 2111 }, { "epoch": 0.2825796093122826, "grad_norm": 27.260663986206055, "learning_rate": 7.174203906877174e-07, "loss": 0.3663, "step": 2112 }, { "epoch": 0.2827134064757827, "grad_norm": 15.827064514160156, "learning_rate": 7.172865935242173e-07, "loss": 0.5014, "step": 2113 }, { "epoch": 0.28284720363928284, "grad_norm": 19.535667419433594, "learning_rate": 7.171527963607171e-07, "loss": 0.525, "step": 2114 }, { "epoch": 0.282981000802783, "grad_norm": 33.07583236694336, "learning_rate": 7.17018999197217e-07, "loss": 0.3363, "step": 2115 }, { "epoch": 0.28311479796628314, "grad_norm": 23.216806411743164, "learning_rate": 7.168852020337169e-07, "loss": 0.5885, "step": 2116 }, { "epoch": 0.28324859512978323, "grad_norm": 19.8622989654541, "learning_rate": 7.167514048702166e-07, "loss": 0.3444, "step": 2117 }, { "epoch": 0.2833823922932834, "grad_norm": 33.6419563293457, "learning_rate": 7.166176077067165e-07, "loss": 0.6197, "step": 2118 }, { "epoch": 0.28351618945678353, "grad_norm": 24.38701057434082, "learning_rate": 7.164838105432165e-07, "loss": 0.3584, "step": 2119 }, { "epoch": 0.2836499866202836, "grad_norm": 13.230379104614258, "learning_rate": 7.163500133797164e-07, "loss": 0.3584, "step": 2120 }, { "epoch": 0.28378378378378377, "grad_norm": 39.40768814086914, "learning_rate": 7.162162162162161e-07, "loss": 0.8081, "step": 2121 }, { "epoch": 0.2839175809472839, "grad_norm": 17.3467960357666, "learning_rate": 7.16082419052716e-07, "loss": 0.5882, "step": 2122 }, { "epoch": 0.28405137811078407, "grad_norm": 16.982681274414062, "learning_rate": 7.159486218892159e-07, "loss": 0.5099, "step": 2123 }, { "epoch": 0.28418517527428416, "grad_norm": 11.852197647094727, "learning_rate": 7.158148247257158e-07, "loss": 0.4207, "step": 2124 }, { "epoch": 0.2843189724377843, "grad_norm": 12.500181198120117, "learning_rate": 7.156810275622156e-07, "loss": 0.5418, "step": 2125 }, { "epoch": 0.28445276960128446, "grad_norm": 29.710519790649414, "learning_rate": 7.155472303987155e-07, "loss": 0.3189, "step": 2126 }, { "epoch": 0.2845865667647846, "grad_norm": 9.55387020111084, "learning_rate": 7.154134332352154e-07, "loss": 0.2678, "step": 2127 }, { "epoch": 0.2847203639282847, "grad_norm": 23.342926025390625, "learning_rate": 7.152796360717153e-07, "loss": 0.6627, "step": 2128 }, { "epoch": 0.28485416109178485, "grad_norm": 15.570130348205566, "learning_rate": 7.151458389082151e-07, "loss": 0.3667, "step": 2129 }, { "epoch": 0.284987958255285, "grad_norm": 21.769731521606445, "learning_rate": 7.15012041744715e-07, "loss": 0.3945, "step": 2130 }, { "epoch": 0.2851217554187851, "grad_norm": 19.177297592163086, "learning_rate": 7.148782445812148e-07, "loss": 0.2978, "step": 2131 }, { "epoch": 0.28525555258228524, "grad_norm": 29.303203582763672, "learning_rate": 7.147444474177147e-07, "loss": 0.6424, "step": 2132 }, { "epoch": 0.2853893497457854, "grad_norm": 16.067501068115234, "learning_rate": 7.146106502542146e-07, "loss": 0.4995, "step": 2133 }, { "epoch": 0.28552314690928554, "grad_norm": 17.815067291259766, "learning_rate": 7.144768530907145e-07, "loss": 0.3632, "step": 2134 }, { "epoch": 0.28565694407278563, "grad_norm": 36.4736213684082, "learning_rate": 7.143430559272144e-07, "loss": 0.5371, "step": 2135 }, { "epoch": 0.2857907412362858, "grad_norm": 23.38298797607422, "learning_rate": 7.142092587637142e-07, "loss": 0.5623, "step": 2136 }, { "epoch": 0.28592453839978593, "grad_norm": 13.740659713745117, "learning_rate": 7.14075461600214e-07, "loss": 0.3912, "step": 2137 }, { "epoch": 0.2860583355632861, "grad_norm": 10.900981903076172, "learning_rate": 7.139416644367139e-07, "loss": 0.4421, "step": 2138 }, { "epoch": 0.2861921327267862, "grad_norm": 15.455164909362793, "learning_rate": 7.138078672732138e-07, "loss": 0.385, "step": 2139 }, { "epoch": 0.2863259298902863, "grad_norm": 17.89333724975586, "learning_rate": 7.136740701097136e-07, "loss": 0.6294, "step": 2140 }, { "epoch": 0.2864597270537865, "grad_norm": 14.185556411743164, "learning_rate": 7.135402729462135e-07, "loss": 0.3891, "step": 2141 }, { "epoch": 0.28659352421728657, "grad_norm": 10.945001602172852, "learning_rate": 7.134064757827134e-07, "loss": 0.3255, "step": 2142 }, { "epoch": 0.2867273213807867, "grad_norm": 46.803466796875, "learning_rate": 7.132726786192133e-07, "loss": 0.7144, "step": 2143 }, { "epoch": 0.28686111854428686, "grad_norm": 8.797143936157227, "learning_rate": 7.13138881455713e-07, "loss": 0.4673, "step": 2144 }, { "epoch": 0.286994915707787, "grad_norm": 23.399229049682617, "learning_rate": 7.130050842922129e-07, "loss": 0.6645, "step": 2145 }, { "epoch": 0.2871287128712871, "grad_norm": 33.98068618774414, "learning_rate": 7.128712871287128e-07, "loss": 0.2758, "step": 2146 }, { "epoch": 0.28726251003478726, "grad_norm": 28.39945411682129, "learning_rate": 7.127374899652127e-07, "loss": 0.4668, "step": 2147 }, { "epoch": 0.2873963071982874, "grad_norm": 34.92594909667969, "learning_rate": 7.126036928017126e-07, "loss": 0.5643, "step": 2148 }, { "epoch": 0.28753010436178755, "grad_norm": 22.45669937133789, "learning_rate": 7.124698956382124e-07, "loss": 0.4482, "step": 2149 }, { "epoch": 0.28766390152528765, "grad_norm": 15.95096492767334, "learning_rate": 7.123360984747123e-07, "loss": 0.4877, "step": 2150 }, { "epoch": 0.2877976986887878, "grad_norm": 34.07988357543945, "learning_rate": 7.122023013112122e-07, "loss": 0.4424, "step": 2151 }, { "epoch": 0.28793149585228794, "grad_norm": 33.776607513427734, "learning_rate": 7.12068504147712e-07, "loss": 0.5452, "step": 2152 }, { "epoch": 0.28806529301578804, "grad_norm": 12.60905933380127, "learning_rate": 7.119347069842119e-07, "loss": 0.4514, "step": 2153 }, { "epoch": 0.2881990901792882, "grad_norm": 31.9103946685791, "learning_rate": 7.118009098207117e-07, "loss": 0.4055, "step": 2154 }, { "epoch": 0.28833288734278834, "grad_norm": 25.807090759277344, "learning_rate": 7.116671126572117e-07, "loss": 0.5867, "step": 2155 }, { "epoch": 0.2884666845062885, "grad_norm": 17.660762786865234, "learning_rate": 7.115333154937115e-07, "loss": 0.5327, "step": 2156 }, { "epoch": 0.2886004816697886, "grad_norm": 20.54017448425293, "learning_rate": 7.113995183302114e-07, "loss": 0.4295, "step": 2157 }, { "epoch": 0.2887342788332887, "grad_norm": 38.959835052490234, "learning_rate": 7.112657211667113e-07, "loss": 0.5104, "step": 2158 }, { "epoch": 0.2888680759967889, "grad_norm": 18.55978012084961, "learning_rate": 7.111319240032111e-07, "loss": 0.3646, "step": 2159 }, { "epoch": 0.289001873160289, "grad_norm": 44.30630111694336, "learning_rate": 7.109981268397109e-07, "loss": 0.2995, "step": 2160 }, { "epoch": 0.2891356703237891, "grad_norm": 23.50271224975586, "learning_rate": 7.108643296762108e-07, "loss": 0.527, "step": 2161 }, { "epoch": 0.28926946748728927, "grad_norm": 12.284360885620117, "learning_rate": 7.107305325127107e-07, "loss": 0.6053, "step": 2162 }, { "epoch": 0.2894032646507894, "grad_norm": 10.633132934570312, "learning_rate": 7.105967353492107e-07, "loss": 0.3627, "step": 2163 }, { "epoch": 0.28953706181428956, "grad_norm": 23.097339630126953, "learning_rate": 7.104629381857104e-07, "loss": 0.5317, "step": 2164 }, { "epoch": 0.28967085897778966, "grad_norm": 27.057735443115234, "learning_rate": 7.103291410222103e-07, "loss": 0.4302, "step": 2165 }, { "epoch": 0.2898046561412898, "grad_norm": 14.498098373413086, "learning_rate": 7.101953438587102e-07, "loss": 0.4969, "step": 2166 }, { "epoch": 0.28993845330478996, "grad_norm": 9.600186347961426, "learning_rate": 7.100615466952101e-07, "loss": 0.2416, "step": 2167 }, { "epoch": 0.29007225046829005, "grad_norm": 14.207609176635742, "learning_rate": 7.099277495317098e-07, "loss": 0.2846, "step": 2168 }, { "epoch": 0.2902060476317902, "grad_norm": 10.996283531188965, "learning_rate": 7.097939523682097e-07, "loss": 0.3695, "step": 2169 }, { "epoch": 0.29033984479529035, "grad_norm": 24.469688415527344, "learning_rate": 7.096601552047097e-07, "loss": 0.313, "step": 2170 }, { "epoch": 0.2904736419587905, "grad_norm": 35.141868591308594, "learning_rate": 7.095263580412095e-07, "loss": 0.6287, "step": 2171 }, { "epoch": 0.2906074391222906, "grad_norm": 10.36569881439209, "learning_rate": 7.093925608777094e-07, "loss": 0.5088, "step": 2172 }, { "epoch": 0.29074123628579074, "grad_norm": 37.99494934082031, "learning_rate": 7.092587637142092e-07, "loss": 0.5864, "step": 2173 }, { "epoch": 0.2908750334492909, "grad_norm": 10.830409049987793, "learning_rate": 7.091249665507091e-07, "loss": 0.6065, "step": 2174 }, { "epoch": 0.29100883061279104, "grad_norm": 15.935203552246094, "learning_rate": 7.089911693872089e-07, "loss": 0.4298, "step": 2175 }, { "epoch": 0.29114262777629113, "grad_norm": 10.750178337097168, "learning_rate": 7.088573722237088e-07, "loss": 0.276, "step": 2176 }, { "epoch": 0.2912764249397913, "grad_norm": 17.124174118041992, "learning_rate": 7.087235750602088e-07, "loss": 0.5082, "step": 2177 }, { "epoch": 0.2914102221032914, "grad_norm": 16.27153968811035, "learning_rate": 7.085897778967086e-07, "loss": 0.7144, "step": 2178 }, { "epoch": 0.2915440192667915, "grad_norm": 8.64734172821045, "learning_rate": 7.084559807332084e-07, "loss": 0.4399, "step": 2179 }, { "epoch": 0.29167781643029167, "grad_norm": 17.106510162353516, "learning_rate": 7.083221835697083e-07, "loss": 0.5239, "step": 2180 }, { "epoch": 0.2918116135937918, "grad_norm": 8.575572967529297, "learning_rate": 7.081883864062082e-07, "loss": 0.3575, "step": 2181 }, { "epoch": 0.29194541075729197, "grad_norm": 39.281524658203125, "learning_rate": 7.08054589242708e-07, "loss": 0.5226, "step": 2182 }, { "epoch": 0.29207920792079206, "grad_norm": 13.692716598510742, "learning_rate": 7.079207920792078e-07, "loss": 0.4378, "step": 2183 }, { "epoch": 0.2922130050842922, "grad_norm": 20.088865280151367, "learning_rate": 7.077869949157077e-07, "loss": 0.7405, "step": 2184 }, { "epoch": 0.29234680224779236, "grad_norm": 14.553046226501465, "learning_rate": 7.076531977522077e-07, "loss": 0.5924, "step": 2185 }, { "epoch": 0.2924805994112925, "grad_norm": 25.30784034729004, "learning_rate": 7.075194005887076e-07, "loss": 0.5592, "step": 2186 }, { "epoch": 0.2926143965747926, "grad_norm": 23.302854537963867, "learning_rate": 7.073856034252073e-07, "loss": 0.4779, "step": 2187 }, { "epoch": 0.29274819373829275, "grad_norm": 46.86249542236328, "learning_rate": 7.072518062617072e-07, "loss": 0.5449, "step": 2188 }, { "epoch": 0.2928819909017929, "grad_norm": 13.524872779846191, "learning_rate": 7.071180090982071e-07, "loss": 0.5496, "step": 2189 }, { "epoch": 0.293015788065293, "grad_norm": 16.24808692932129, "learning_rate": 7.06984211934707e-07, "loss": 0.4967, "step": 2190 }, { "epoch": 0.29314958522879314, "grad_norm": 27.86421775817871, "learning_rate": 7.068504147712067e-07, "loss": 0.3697, "step": 2191 }, { "epoch": 0.2932833823922933, "grad_norm": 21.9457950592041, "learning_rate": 7.067166176077067e-07, "loss": 0.4397, "step": 2192 }, { "epoch": 0.29341717955579344, "grad_norm": 21.162992477416992, "learning_rate": 7.065828204442066e-07, "loss": 0.5438, "step": 2193 }, { "epoch": 0.29355097671929353, "grad_norm": 9.49245548248291, "learning_rate": 7.064490232807065e-07, "loss": 0.6094, "step": 2194 }, { "epoch": 0.2936847738827937, "grad_norm": 19.486099243164062, "learning_rate": 7.063152261172063e-07, "loss": 0.5635, "step": 2195 }, { "epoch": 0.29381857104629383, "grad_norm": 16.183176040649414, "learning_rate": 7.061814289537061e-07, "loss": 0.3945, "step": 2196 }, { "epoch": 0.293952368209794, "grad_norm": 17.158384323120117, "learning_rate": 7.06047631790206e-07, "loss": 0.6018, "step": 2197 }, { "epoch": 0.2940861653732941, "grad_norm": 9.76070785522461, "learning_rate": 7.059138346267058e-07, "loss": 0.4092, "step": 2198 }, { "epoch": 0.2942199625367942, "grad_norm": 9.351763725280762, "learning_rate": 7.057800374632058e-07, "loss": 0.3649, "step": 2199 }, { "epoch": 0.29435375970029437, "grad_norm": 21.336498260498047, "learning_rate": 7.056462402997057e-07, "loss": 0.3502, "step": 2200 }, { "epoch": 0.29448755686379446, "grad_norm": 20.63300323486328, "learning_rate": 7.055124431362055e-07, "loss": 0.4327, "step": 2201 }, { "epoch": 0.2946213540272946, "grad_norm": 10.499664306640625, "learning_rate": 7.053786459727053e-07, "loss": 0.4534, "step": 2202 }, { "epoch": 0.29475515119079476, "grad_norm": 9.591582298278809, "learning_rate": 7.052448488092052e-07, "loss": 0.3877, "step": 2203 }, { "epoch": 0.2948889483542949, "grad_norm": 41.46502685546875, "learning_rate": 7.051110516457051e-07, "loss": 0.5259, "step": 2204 }, { "epoch": 0.295022745517795, "grad_norm": 11.63677978515625, "learning_rate": 7.049772544822049e-07, "loss": 0.373, "step": 2205 }, { "epoch": 0.29515654268129515, "grad_norm": 13.95627498626709, "learning_rate": 7.048434573187047e-07, "loss": 0.5808, "step": 2206 }, { "epoch": 0.2952903398447953, "grad_norm": 15.587401390075684, "learning_rate": 7.047096601552047e-07, "loss": 0.3592, "step": 2207 }, { "epoch": 0.29542413700829545, "grad_norm": 9.953570365905762, "learning_rate": 7.045758629917046e-07, "loss": 0.4074, "step": 2208 }, { "epoch": 0.29555793417179554, "grad_norm": 15.0835542678833, "learning_rate": 7.044420658282045e-07, "loss": 0.5256, "step": 2209 }, { "epoch": 0.2956917313352957, "grad_norm": 19.9360408782959, "learning_rate": 7.043082686647042e-07, "loss": 0.378, "step": 2210 }, { "epoch": 0.29582552849879584, "grad_norm": 16.637042999267578, "learning_rate": 7.041744715012041e-07, "loss": 0.667, "step": 2211 }, { "epoch": 0.29595932566229594, "grad_norm": 14.970292091369629, "learning_rate": 7.04040674337704e-07, "loss": 0.5778, "step": 2212 }, { "epoch": 0.2960931228257961, "grad_norm": 17.144351959228516, "learning_rate": 7.039068771742039e-07, "loss": 0.475, "step": 2213 }, { "epoch": 0.29622691998929623, "grad_norm": 19.09200096130371, "learning_rate": 7.037730800107038e-07, "loss": 0.3184, "step": 2214 }, { "epoch": 0.2963607171527964, "grad_norm": 19.138202667236328, "learning_rate": 7.036392828472036e-07, "loss": 0.6457, "step": 2215 }, { "epoch": 0.2964945143162965, "grad_norm": 17.877595901489258, "learning_rate": 7.035054856837035e-07, "loss": 0.4581, "step": 2216 }, { "epoch": 0.2966283114797966, "grad_norm": 31.663131713867188, "learning_rate": 7.033716885202034e-07, "loss": 0.596, "step": 2217 }, { "epoch": 0.2967621086432968, "grad_norm": 13.31049919128418, "learning_rate": 7.032378913567032e-07, "loss": 0.4643, "step": 2218 }, { "epoch": 0.2968959058067969, "grad_norm": 11.608640670776367, "learning_rate": 7.03104094193203e-07, "loss": 0.3963, "step": 2219 }, { "epoch": 0.297029702970297, "grad_norm": 27.89873695373535, "learning_rate": 7.029702970297029e-07, "loss": 0.7001, "step": 2220 }, { "epoch": 0.29716350013379716, "grad_norm": 27.70355987548828, "learning_rate": 7.028364998662029e-07, "loss": 0.4299, "step": 2221 }, { "epoch": 0.2972972972972973, "grad_norm": 14.345943450927734, "learning_rate": 7.027027027027027e-07, "loss": 0.3293, "step": 2222 }, { "epoch": 0.2974310944607974, "grad_norm": 11.273019790649414, "learning_rate": 7.025689055392026e-07, "loss": 0.4318, "step": 2223 }, { "epoch": 0.29756489162429756, "grad_norm": 9.364678382873535, "learning_rate": 7.024351083757024e-07, "loss": 0.3684, "step": 2224 }, { "epoch": 0.2976986887877977, "grad_norm": 11.524425506591797, "learning_rate": 7.023013112122022e-07, "loss": 0.4155, "step": 2225 }, { "epoch": 0.29783248595129785, "grad_norm": 16.045047760009766, "learning_rate": 7.021675140487021e-07, "loss": 0.4042, "step": 2226 }, { "epoch": 0.29796628311479795, "grad_norm": 20.61585807800293, "learning_rate": 7.02033716885202e-07, "loss": 0.6536, "step": 2227 }, { "epoch": 0.2981000802782981, "grad_norm": 18.059926986694336, "learning_rate": 7.018999197217019e-07, "loss": 0.5752, "step": 2228 }, { "epoch": 0.29823387744179825, "grad_norm": 31.896039962768555, "learning_rate": 7.017661225582017e-07, "loss": 0.3521, "step": 2229 }, { "epoch": 0.2983676746052984, "grad_norm": 17.975086212158203, "learning_rate": 7.016323253947016e-07, "loss": 0.6338, "step": 2230 }, { "epoch": 0.2985014717687985, "grad_norm": 47.546592712402344, "learning_rate": 7.014985282312015e-07, "loss": 0.4335, "step": 2231 }, { "epoch": 0.29863526893229864, "grad_norm": 28.201936721801758, "learning_rate": 7.013647310677014e-07, "loss": 0.3656, "step": 2232 }, { "epoch": 0.2987690660957988, "grad_norm": 34.942325592041016, "learning_rate": 7.012309339042011e-07, "loss": 0.3185, "step": 2233 }, { "epoch": 0.2989028632592989, "grad_norm": 23.65660285949707, "learning_rate": 7.01097136740701e-07, "loss": 0.4108, "step": 2234 }, { "epoch": 0.299036660422799, "grad_norm": 32.872745513916016, "learning_rate": 7.009633395772009e-07, "loss": 0.5994, "step": 2235 }, { "epoch": 0.2991704575862992, "grad_norm": 13.382479667663574, "learning_rate": 7.008295424137009e-07, "loss": 0.2965, "step": 2236 }, { "epoch": 0.2993042547497993, "grad_norm": 21.50666046142578, "learning_rate": 7.006957452502007e-07, "loss": 0.525, "step": 2237 }, { "epoch": 0.2994380519132994, "grad_norm": 24.149930953979492, "learning_rate": 7.005619480867005e-07, "loss": 0.2485, "step": 2238 }, { "epoch": 0.29957184907679957, "grad_norm": 50.68666458129883, "learning_rate": 7.004281509232004e-07, "loss": 1.0249, "step": 2239 }, { "epoch": 0.2997056462402997, "grad_norm": 31.965255737304688, "learning_rate": 7.002943537597003e-07, "loss": 0.6451, "step": 2240 }, { "epoch": 0.29983944340379987, "grad_norm": 14.158623695373535, "learning_rate": 7.001605565962001e-07, "loss": 0.439, "step": 2241 }, { "epoch": 0.29997324056729996, "grad_norm": 14.128376007080078, "learning_rate": 7.000267594327e-07, "loss": 0.5364, "step": 2242 }, { "epoch": 0.3001070377308001, "grad_norm": 15.0392427444458, "learning_rate": 6.998929622691999e-07, "loss": 0.3952, "step": 2243 }, { "epoch": 0.30024083489430026, "grad_norm": 28.43758773803711, "learning_rate": 6.997591651056998e-07, "loss": 0.32, "step": 2244 }, { "epoch": 0.30037463205780035, "grad_norm": 14.489378929138184, "learning_rate": 6.996253679421996e-07, "loss": 0.4353, "step": 2245 }, { "epoch": 0.3005084292213005, "grad_norm": 23.547687530517578, "learning_rate": 6.994915707786995e-07, "loss": 0.6917, "step": 2246 }, { "epoch": 0.30064222638480065, "grad_norm": 19.09604835510254, "learning_rate": 6.993577736151993e-07, "loss": 0.6346, "step": 2247 }, { "epoch": 0.3007760235483008, "grad_norm": 24.165267944335938, "learning_rate": 6.992239764516991e-07, "loss": 0.6778, "step": 2248 }, { "epoch": 0.3009098207118009, "grad_norm": 18.33153533935547, "learning_rate": 6.99090179288199e-07, "loss": 0.6012, "step": 2249 }, { "epoch": 0.30104361787530104, "grad_norm": 39.998870849609375, "learning_rate": 6.989563821246989e-07, "loss": 0.4742, "step": 2250 }, { "epoch": 0.3011774150388012, "grad_norm": 12.835319519042969, "learning_rate": 6.988225849611989e-07, "loss": 0.5979, "step": 2251 }, { "epoch": 0.30131121220230134, "grad_norm": 16.046733856201172, "learning_rate": 6.986887877976986e-07, "loss": 0.4048, "step": 2252 }, { "epoch": 0.30144500936580143, "grad_norm": 13.668875694274902, "learning_rate": 6.985549906341985e-07, "loss": 0.4041, "step": 2253 }, { "epoch": 0.3015788065293016, "grad_norm": 15.984082221984863, "learning_rate": 6.984211934706984e-07, "loss": 0.5604, "step": 2254 }, { "epoch": 0.30171260369280173, "grad_norm": 15.051359176635742, "learning_rate": 6.982873963071983e-07, "loss": 0.5199, "step": 2255 }, { "epoch": 0.3018464008563018, "grad_norm": 14.178597450256348, "learning_rate": 6.98153599143698e-07, "loss": 0.4286, "step": 2256 }, { "epoch": 0.30198019801980197, "grad_norm": 14.811824798583984, "learning_rate": 6.980198019801979e-07, "loss": 0.4563, "step": 2257 }, { "epoch": 0.3021139951833021, "grad_norm": 16.318483352661133, "learning_rate": 6.978860048166979e-07, "loss": 0.4765, "step": 2258 }, { "epoch": 0.30224779234680227, "grad_norm": 25.286026000976562, "learning_rate": 6.977522076531978e-07, "loss": 0.4711, "step": 2259 }, { "epoch": 0.30238158951030236, "grad_norm": 18.78377342224121, "learning_rate": 6.976184104896976e-07, "loss": 0.4451, "step": 2260 }, { "epoch": 0.3025153866738025, "grad_norm": 10.348203659057617, "learning_rate": 6.974846133261974e-07, "loss": 0.4373, "step": 2261 }, { "epoch": 0.30264918383730266, "grad_norm": 21.690231323242188, "learning_rate": 6.973508161626973e-07, "loss": 0.3779, "step": 2262 }, { "epoch": 0.3027829810008028, "grad_norm": 24.184329986572266, "learning_rate": 6.972170189991972e-07, "loss": 0.3972, "step": 2263 }, { "epoch": 0.3029167781643029, "grad_norm": 14.926597595214844, "learning_rate": 6.97083221835697e-07, "loss": 0.5231, "step": 2264 }, { "epoch": 0.30305057532780305, "grad_norm": 21.72926902770996, "learning_rate": 6.96949424672197e-07, "loss": 0.5786, "step": 2265 }, { "epoch": 0.3031843724913032, "grad_norm": 13.234630584716797, "learning_rate": 6.968156275086968e-07, "loss": 0.4556, "step": 2266 }, { "epoch": 0.3033181696548033, "grad_norm": 34.71483612060547, "learning_rate": 6.966818303451967e-07, "loss": 0.2844, "step": 2267 }, { "epoch": 0.30345196681830344, "grad_norm": 11.561365127563477, "learning_rate": 6.965480331816965e-07, "loss": 0.48, "step": 2268 }, { "epoch": 0.3035857639818036, "grad_norm": 21.704923629760742, "learning_rate": 6.964142360181964e-07, "loss": 0.5208, "step": 2269 }, { "epoch": 0.30371956114530374, "grad_norm": 15.639002799987793, "learning_rate": 6.962804388546963e-07, "loss": 0.4503, "step": 2270 }, { "epoch": 0.30385335830880383, "grad_norm": 24.169635772705078, "learning_rate": 6.961466416911961e-07, "loss": 0.3748, "step": 2271 }, { "epoch": 0.303987155472304, "grad_norm": 11.694982528686523, "learning_rate": 6.960128445276959e-07, "loss": 0.5305, "step": 2272 }, { "epoch": 0.30412095263580413, "grad_norm": 59.34484100341797, "learning_rate": 6.958790473641959e-07, "loss": 0.6948, "step": 2273 }, { "epoch": 0.3042547497993043, "grad_norm": 18.817636489868164, "learning_rate": 6.957452502006958e-07, "loss": 0.5184, "step": 2274 }, { "epoch": 0.3043885469628044, "grad_norm": 10.293685913085938, "learning_rate": 6.956114530371955e-07, "loss": 0.5334, "step": 2275 }, { "epoch": 0.3045223441263045, "grad_norm": 13.851151466369629, "learning_rate": 6.954776558736954e-07, "loss": 0.4677, "step": 2276 }, { "epoch": 0.30465614128980467, "grad_norm": 35.750484466552734, "learning_rate": 6.953438587101953e-07, "loss": 0.5968, "step": 2277 }, { "epoch": 0.30478993845330477, "grad_norm": 23.86069679260254, "learning_rate": 6.952100615466952e-07, "loss": 0.3102, "step": 2278 }, { "epoch": 0.3049237356168049, "grad_norm": 20.423725128173828, "learning_rate": 6.95076264383195e-07, "loss": 0.2846, "step": 2279 }, { "epoch": 0.30505753278030506, "grad_norm": 22.518526077270508, "learning_rate": 6.949424672196949e-07, "loss": 0.247, "step": 2280 }, { "epoch": 0.3051913299438052, "grad_norm": 26.667802810668945, "learning_rate": 6.948086700561948e-07, "loss": 0.6511, "step": 2281 }, { "epoch": 0.3053251271073053, "grad_norm": 26.727014541625977, "learning_rate": 6.946748728926947e-07, "loss": 0.3675, "step": 2282 }, { "epoch": 0.30545892427080545, "grad_norm": 26.369064331054688, "learning_rate": 6.945410757291945e-07, "loss": 0.5059, "step": 2283 }, { "epoch": 0.3055927214343056, "grad_norm": 17.814678192138672, "learning_rate": 6.944072785656944e-07, "loss": 0.3868, "step": 2284 }, { "epoch": 0.30572651859780575, "grad_norm": 56.601806640625, "learning_rate": 6.942734814021942e-07, "loss": 0.6937, "step": 2285 }, { "epoch": 0.30586031576130585, "grad_norm": 11.234055519104004, "learning_rate": 6.941396842386941e-07, "loss": 0.3033, "step": 2286 }, { "epoch": 0.305994112924806, "grad_norm": 14.841670036315918, "learning_rate": 6.94005887075194e-07, "loss": 0.3561, "step": 2287 }, { "epoch": 0.30612791008830614, "grad_norm": 29.043163299560547, "learning_rate": 6.938720899116939e-07, "loss": 0.4835, "step": 2288 }, { "epoch": 0.30626170725180624, "grad_norm": 11.735191345214844, "learning_rate": 6.937382927481937e-07, "loss": 0.3208, "step": 2289 }, { "epoch": 0.3063955044153064, "grad_norm": 11.905729293823242, "learning_rate": 6.936044955846936e-07, "loss": 0.3281, "step": 2290 }, { "epoch": 0.30652930157880653, "grad_norm": 10.301342964172363, "learning_rate": 6.934706984211934e-07, "loss": 0.4689, "step": 2291 }, { "epoch": 0.3066630987423067, "grad_norm": 35.647403717041016, "learning_rate": 6.933369012576933e-07, "loss": 0.6551, "step": 2292 }, { "epoch": 0.3067968959058068, "grad_norm": 11.085896492004395, "learning_rate": 6.932031040941932e-07, "loss": 0.312, "step": 2293 }, { "epoch": 0.3069306930693069, "grad_norm": 26.34748077392578, "learning_rate": 6.93069306930693e-07, "loss": 0.2986, "step": 2294 }, { "epoch": 0.3070644902328071, "grad_norm": 12.273345947265625, "learning_rate": 6.929355097671929e-07, "loss": 0.5508, "step": 2295 }, { "epoch": 0.3071982873963072, "grad_norm": 13.283063888549805, "learning_rate": 6.928017126036928e-07, "loss": 0.3671, "step": 2296 }, { "epoch": 0.3073320845598073, "grad_norm": 16.125423431396484, "learning_rate": 6.926679154401927e-07, "loss": 0.4227, "step": 2297 }, { "epoch": 0.30746588172330747, "grad_norm": 18.88006019592285, "learning_rate": 6.925341182766926e-07, "loss": 0.3189, "step": 2298 }, { "epoch": 0.3075996788868076, "grad_norm": 26.514314651489258, "learning_rate": 6.924003211131923e-07, "loss": 0.5136, "step": 2299 }, { "epoch": 0.3077334760503077, "grad_norm": 15.821619033813477, "learning_rate": 6.922665239496922e-07, "loss": 0.4316, "step": 2300 }, { "epoch": 0.30786727321380786, "grad_norm": 20.46954345703125, "learning_rate": 6.921327267861921e-07, "loss": 0.611, "step": 2301 }, { "epoch": 0.308001070377308, "grad_norm": 18.763914108276367, "learning_rate": 6.919989296226921e-07, "loss": 0.3941, "step": 2302 }, { "epoch": 0.30813486754080815, "grad_norm": 18.692033767700195, "learning_rate": 6.918651324591918e-07, "loss": 0.3825, "step": 2303 }, { "epoch": 0.30826866470430825, "grad_norm": 15.24742603302002, "learning_rate": 6.917313352956917e-07, "loss": 0.3936, "step": 2304 }, { "epoch": 0.3084024618678084, "grad_norm": 19.24734878540039, "learning_rate": 6.915975381321916e-07, "loss": 0.3832, "step": 2305 }, { "epoch": 0.30853625903130855, "grad_norm": 39.485897064208984, "learning_rate": 6.914637409686914e-07, "loss": 0.5982, "step": 2306 }, { "epoch": 0.3086700561948087, "grad_norm": 32.92052459716797, "learning_rate": 6.913299438051913e-07, "loss": 0.5921, "step": 2307 }, { "epoch": 0.3088038533583088, "grad_norm": 50.85581970214844, "learning_rate": 6.911961466416911e-07, "loss": 0.7402, "step": 2308 }, { "epoch": 0.30893765052180894, "grad_norm": 26.284183502197266, "learning_rate": 6.91062349478191e-07, "loss": 0.3375, "step": 2309 }, { "epoch": 0.3090714476853091, "grad_norm": 12.996792793273926, "learning_rate": 6.909285523146909e-07, "loss": 0.3012, "step": 2310 }, { "epoch": 0.3092052448488092, "grad_norm": 28.55484390258789, "learning_rate": 6.907947551511908e-07, "loss": 0.2162, "step": 2311 }, { "epoch": 0.30933904201230933, "grad_norm": 26.030336380004883, "learning_rate": 6.906609579876907e-07, "loss": 0.4911, "step": 2312 }, { "epoch": 0.3094728391758095, "grad_norm": 29.644039154052734, "learning_rate": 6.905271608241905e-07, "loss": 0.465, "step": 2313 }, { "epoch": 0.3096066363393096, "grad_norm": 39.50339889526367, "learning_rate": 6.903933636606903e-07, "loss": 0.4725, "step": 2314 }, { "epoch": 0.3097404335028097, "grad_norm": 30.551973342895508, "learning_rate": 6.902595664971902e-07, "loss": 0.5159, "step": 2315 }, { "epoch": 0.30987423066630987, "grad_norm": 16.5247745513916, "learning_rate": 6.901257693336901e-07, "loss": 0.5572, "step": 2316 }, { "epoch": 0.31000802782981, "grad_norm": 16.518537521362305, "learning_rate": 6.8999197217019e-07, "loss": 0.5546, "step": 2317 }, { "epoch": 0.31014182499331017, "grad_norm": 16.04912567138672, "learning_rate": 6.898581750066898e-07, "loss": 0.6547, "step": 2318 }, { "epoch": 0.31027562215681026, "grad_norm": 15.902009010314941, "learning_rate": 6.897243778431897e-07, "loss": 0.4278, "step": 2319 }, { "epoch": 0.3104094193203104, "grad_norm": 16.478900909423828, "learning_rate": 6.895905806796896e-07, "loss": 0.5691, "step": 2320 }, { "epoch": 0.31054321648381056, "grad_norm": 18.203643798828125, "learning_rate": 6.894567835161895e-07, "loss": 0.5281, "step": 2321 }, { "epoch": 0.31067701364731065, "grad_norm": 27.996572494506836, "learning_rate": 6.893229863526892e-07, "loss": 0.4311, "step": 2322 }, { "epoch": 0.3108108108108108, "grad_norm": 15.642548561096191, "learning_rate": 6.891891891891891e-07, "loss": 0.464, "step": 2323 }, { "epoch": 0.31094460797431095, "grad_norm": 24.793283462524414, "learning_rate": 6.890553920256891e-07, "loss": 0.5982, "step": 2324 }, { "epoch": 0.3110784051378111, "grad_norm": 35.44468688964844, "learning_rate": 6.88921594862189e-07, "loss": 0.5745, "step": 2325 }, { "epoch": 0.3112122023013112, "grad_norm": 17.779403686523438, "learning_rate": 6.887877976986888e-07, "loss": 0.4107, "step": 2326 }, { "epoch": 0.31134599946481134, "grad_norm": 11.753454208374023, "learning_rate": 6.886540005351886e-07, "loss": 0.4028, "step": 2327 }, { "epoch": 0.3114797966283115, "grad_norm": 17.279903411865234, "learning_rate": 6.885202033716885e-07, "loss": 0.2278, "step": 2328 }, { "epoch": 0.31161359379181164, "grad_norm": 22.651689529418945, "learning_rate": 6.883864062081883e-07, "loss": 0.4372, "step": 2329 }, { "epoch": 0.31174739095531173, "grad_norm": 14.813192367553711, "learning_rate": 6.882526090446882e-07, "loss": 0.4354, "step": 2330 }, { "epoch": 0.3118811881188119, "grad_norm": 24.994401931762695, "learning_rate": 6.88118811881188e-07, "loss": 0.687, "step": 2331 }, { "epoch": 0.31201498528231203, "grad_norm": 33.86027908325195, "learning_rate": 6.87985014717688e-07, "loss": 0.7602, "step": 2332 }, { "epoch": 0.3121487824458121, "grad_norm": 23.46344566345215, "learning_rate": 6.878512175541878e-07, "loss": 0.57, "step": 2333 }, { "epoch": 0.31228257960931227, "grad_norm": 11.902565002441406, "learning_rate": 6.877174203906877e-07, "loss": 0.5367, "step": 2334 }, { "epoch": 0.3124163767728124, "grad_norm": 11.423251152038574, "learning_rate": 6.875836232271876e-07, "loss": 0.4422, "step": 2335 }, { "epoch": 0.31255017393631257, "grad_norm": 11.432966232299805, "learning_rate": 6.874498260636874e-07, "loss": 0.3961, "step": 2336 }, { "epoch": 0.31268397109981266, "grad_norm": 10.409493446350098, "learning_rate": 6.873160289001872e-07, "loss": 0.4176, "step": 2337 }, { "epoch": 0.3128177682633128, "grad_norm": 9.444841384887695, "learning_rate": 6.871822317366871e-07, "loss": 0.4292, "step": 2338 }, { "epoch": 0.31295156542681296, "grad_norm": 20.306636810302734, "learning_rate": 6.870484345731871e-07, "loss": 0.4035, "step": 2339 }, { "epoch": 0.3130853625903131, "grad_norm": 19.923629760742188, "learning_rate": 6.86914637409687e-07, "loss": 0.5354, "step": 2340 }, { "epoch": 0.3132191597538132, "grad_norm": 14.549517631530762, "learning_rate": 6.867808402461867e-07, "loss": 0.5172, "step": 2341 }, { "epoch": 0.31335295691731335, "grad_norm": 16.09527587890625, "learning_rate": 6.866470430826866e-07, "loss": 0.5718, "step": 2342 }, { "epoch": 0.3134867540808135, "grad_norm": 26.044803619384766, "learning_rate": 6.865132459191865e-07, "loss": 0.4092, "step": 2343 }, { "epoch": 0.3136205512443136, "grad_norm": 42.57682800292969, "learning_rate": 6.863794487556864e-07, "loss": 0.3756, "step": 2344 }, { "epoch": 0.31375434840781374, "grad_norm": 20.716384887695312, "learning_rate": 6.862456515921861e-07, "loss": 0.3624, "step": 2345 }, { "epoch": 0.3138881455713139, "grad_norm": 34.29291915893555, "learning_rate": 6.861118544286861e-07, "loss": 0.3036, "step": 2346 }, { "epoch": 0.31402194273481404, "grad_norm": 26.37322998046875, "learning_rate": 6.85978057265186e-07, "loss": 0.4859, "step": 2347 }, { "epoch": 0.31415573989831413, "grad_norm": 62.72554016113281, "learning_rate": 6.858442601016859e-07, "loss": 0.4302, "step": 2348 }, { "epoch": 0.3142895370618143, "grad_norm": 17.39204978942871, "learning_rate": 6.857104629381857e-07, "loss": 0.2752, "step": 2349 }, { "epoch": 0.31442333422531443, "grad_norm": 16.65066146850586, "learning_rate": 6.855766657746855e-07, "loss": 0.4048, "step": 2350 }, { "epoch": 0.3145571313888146, "grad_norm": 14.294013977050781, "learning_rate": 6.854428686111854e-07, "loss": 0.4657, "step": 2351 }, { "epoch": 0.3146909285523147, "grad_norm": 29.56268882751465, "learning_rate": 6.853090714476852e-07, "loss": 0.558, "step": 2352 }, { "epoch": 0.3148247257158148, "grad_norm": 14.128198623657227, "learning_rate": 6.851752742841851e-07, "loss": 0.3257, "step": 2353 }, { "epoch": 0.314958522879315, "grad_norm": 27.592042922973633, "learning_rate": 6.85041477120685e-07, "loss": 0.5504, "step": 2354 }, { "epoch": 0.31509232004281507, "grad_norm": 14.229523658752441, "learning_rate": 6.849076799571849e-07, "loss": 0.3269, "step": 2355 }, { "epoch": 0.3152261172063152, "grad_norm": 13.639404296875, "learning_rate": 6.847738827936847e-07, "loss": 0.4748, "step": 2356 }, { "epoch": 0.31535991436981536, "grad_norm": 22.407236099243164, "learning_rate": 6.846400856301846e-07, "loss": 0.3173, "step": 2357 }, { "epoch": 0.3154937115333155, "grad_norm": 18.127151489257812, "learning_rate": 6.845062884666845e-07, "loss": 0.3733, "step": 2358 }, { "epoch": 0.3156275086968156, "grad_norm": 12.469989776611328, "learning_rate": 6.843724913031843e-07, "loss": 0.4411, "step": 2359 }, { "epoch": 0.31576130586031576, "grad_norm": 19.680967330932617, "learning_rate": 6.842386941396841e-07, "loss": 0.3095, "step": 2360 }, { "epoch": 0.3158951030238159, "grad_norm": 23.370418548583984, "learning_rate": 6.841048969761841e-07, "loss": 0.4645, "step": 2361 }, { "epoch": 0.31602890018731605, "grad_norm": 37.46030807495117, "learning_rate": 6.83971099812684e-07, "loss": 0.5313, "step": 2362 }, { "epoch": 0.31616269735081615, "grad_norm": 17.20787239074707, "learning_rate": 6.838373026491839e-07, "loss": 0.3218, "step": 2363 }, { "epoch": 0.3162964945143163, "grad_norm": 13.481287956237793, "learning_rate": 6.837035054856836e-07, "loss": 0.2428, "step": 2364 }, { "epoch": 0.31643029167781644, "grad_norm": 10.895913124084473, "learning_rate": 6.835697083221835e-07, "loss": 0.3358, "step": 2365 }, { "epoch": 0.31656408884131654, "grad_norm": 18.24452018737793, "learning_rate": 6.834359111586834e-07, "loss": 0.3529, "step": 2366 }, { "epoch": 0.3166978860048167, "grad_norm": 10.080953598022461, "learning_rate": 6.833021139951833e-07, "loss": 0.2095, "step": 2367 }, { "epoch": 0.31683168316831684, "grad_norm": 29.408924102783203, "learning_rate": 6.831683168316831e-07, "loss": 0.4462, "step": 2368 }, { "epoch": 0.316965480331817, "grad_norm": 12.234246253967285, "learning_rate": 6.83034519668183e-07, "loss": 0.3687, "step": 2369 }, { "epoch": 0.3170992774953171, "grad_norm": 34.06084060668945, "learning_rate": 6.829007225046829e-07, "loss": 0.5291, "step": 2370 }, { "epoch": 0.3172330746588172, "grad_norm": 12.06781005859375, "learning_rate": 6.827669253411828e-07, "loss": 0.2246, "step": 2371 }, { "epoch": 0.3173668718223174, "grad_norm": 22.162918090820312, "learning_rate": 6.826331281776826e-07, "loss": 0.4676, "step": 2372 }, { "epoch": 0.3175006689858175, "grad_norm": 11.49403190612793, "learning_rate": 6.824993310141824e-07, "loss": 0.1924, "step": 2373 }, { "epoch": 0.3176344661493176, "grad_norm": 36.2479133605957, "learning_rate": 6.823655338506823e-07, "loss": 0.3874, "step": 2374 }, { "epoch": 0.31776826331281777, "grad_norm": 15.185364723205566, "learning_rate": 6.822317366871822e-07, "loss": 0.3952, "step": 2375 }, { "epoch": 0.3179020604763179, "grad_norm": 19.047643661499023, "learning_rate": 6.820979395236821e-07, "loss": 0.4285, "step": 2376 }, { "epoch": 0.318035857639818, "grad_norm": 13.26754379272461, "learning_rate": 6.81964142360182e-07, "loss": 0.2416, "step": 2377 }, { "epoch": 0.31816965480331816, "grad_norm": 17.679059982299805, "learning_rate": 6.818303451966818e-07, "loss": 0.316, "step": 2378 }, { "epoch": 0.3183034519668183, "grad_norm": 40.69205856323242, "learning_rate": 6.816965480331816e-07, "loss": 0.4786, "step": 2379 }, { "epoch": 0.31843724913031846, "grad_norm": 21.23810386657715, "learning_rate": 6.815627508696815e-07, "loss": 0.1908, "step": 2380 }, { "epoch": 0.31857104629381855, "grad_norm": 34.182979583740234, "learning_rate": 6.814289537061814e-07, "loss": 0.5258, "step": 2381 }, { "epoch": 0.3187048434573187, "grad_norm": 17.68697738647461, "learning_rate": 6.812951565426812e-07, "loss": 0.2724, "step": 2382 }, { "epoch": 0.31883864062081885, "grad_norm": 21.006227493286133, "learning_rate": 6.811613593791811e-07, "loss": 0.5873, "step": 2383 }, { "epoch": 0.318972437784319, "grad_norm": 14.14207935333252, "learning_rate": 6.81027562215681e-07, "loss": 0.3345, "step": 2384 }, { "epoch": 0.3191062349478191, "grad_norm": 33.52621841430664, "learning_rate": 6.808937650521809e-07, "loss": 0.5454, "step": 2385 }, { "epoch": 0.31924003211131924, "grad_norm": 25.076072692871094, "learning_rate": 6.807599678886808e-07, "loss": 0.2648, "step": 2386 }, { "epoch": 0.3193738292748194, "grad_norm": 23.10870361328125, "learning_rate": 6.806261707251805e-07, "loss": 0.546, "step": 2387 }, { "epoch": 0.3195076264383195, "grad_norm": 44.21315383911133, "learning_rate": 6.804923735616804e-07, "loss": 0.5638, "step": 2388 }, { "epoch": 0.31964142360181963, "grad_norm": 13.496397972106934, "learning_rate": 6.803585763981803e-07, "loss": 0.2913, "step": 2389 }, { "epoch": 0.3197752207653198, "grad_norm": 19.42118263244629, "learning_rate": 6.802247792346803e-07, "loss": 0.1764, "step": 2390 }, { "epoch": 0.3199090179288199, "grad_norm": 18.788368225097656, "learning_rate": 6.800909820711801e-07, "loss": 0.4287, "step": 2391 }, { "epoch": 0.32004281509232, "grad_norm": 32.94389724731445, "learning_rate": 6.799571849076799e-07, "loss": 0.4463, "step": 2392 }, { "epoch": 0.32017661225582017, "grad_norm": 23.445011138916016, "learning_rate": 6.798233877441798e-07, "loss": 0.3557, "step": 2393 }, { "epoch": 0.3203104094193203, "grad_norm": 11.16135025024414, "learning_rate": 6.796895905806797e-07, "loss": 0.2389, "step": 2394 }, { "epoch": 0.32044420658282047, "grad_norm": 26.64107322692871, "learning_rate": 6.795557934171795e-07, "loss": 0.5485, "step": 2395 }, { "epoch": 0.32057800374632056, "grad_norm": 31.592823028564453, "learning_rate": 6.794219962536793e-07, "loss": 0.5931, "step": 2396 }, { "epoch": 0.3207118009098207, "grad_norm": 32.89146041870117, "learning_rate": 6.792881990901792e-07, "loss": 0.3927, "step": 2397 }, { "epoch": 0.32084559807332086, "grad_norm": 20.805021286010742, "learning_rate": 6.791544019266792e-07, "loss": 0.5289, "step": 2398 }, { "epoch": 0.32097939523682095, "grad_norm": 35.82832717895508, "learning_rate": 6.79020604763179e-07, "loss": 0.3761, "step": 2399 }, { "epoch": 0.3211131924003211, "grad_norm": 21.695287704467773, "learning_rate": 6.788868075996789e-07, "loss": 0.411, "step": 2400 }, { "epoch": 0.32124698956382125, "grad_norm": 26.295530319213867, "learning_rate": 6.787530104361787e-07, "loss": 0.4973, "step": 2401 }, { "epoch": 0.3213807867273214, "grad_norm": 33.19816589355469, "learning_rate": 6.786192132726786e-07, "loss": 0.2211, "step": 2402 }, { "epoch": 0.3215145838908215, "grad_norm": 36.54378128051758, "learning_rate": 6.784854161091784e-07, "loss": 0.3914, "step": 2403 }, { "epoch": 0.32164838105432164, "grad_norm": 25.153425216674805, "learning_rate": 6.783516189456783e-07, "loss": 0.4442, "step": 2404 }, { "epoch": 0.3217821782178218, "grad_norm": 39.79047393798828, "learning_rate": 6.782178217821783e-07, "loss": 0.4745, "step": 2405 }, { "epoch": 0.32191597538132194, "grad_norm": 19.74188995361328, "learning_rate": 6.78084024618678e-07, "loss": 0.3603, "step": 2406 }, { "epoch": 0.32204977254482203, "grad_norm": 18.890583038330078, "learning_rate": 6.779502274551779e-07, "loss": 0.5062, "step": 2407 }, { "epoch": 0.3221835697083222, "grad_norm": 16.32769012451172, "learning_rate": 6.778164302916778e-07, "loss": 0.5198, "step": 2408 }, { "epoch": 0.32231736687182233, "grad_norm": 44.342586517333984, "learning_rate": 6.776826331281777e-07, "loss": 0.5773, "step": 2409 }, { "epoch": 0.3224511640353224, "grad_norm": 37.25600814819336, "learning_rate": 6.775488359646774e-07, "loss": 0.4078, "step": 2410 }, { "epoch": 0.3225849611988226, "grad_norm": 20.603374481201172, "learning_rate": 6.774150388011773e-07, "loss": 0.484, "step": 2411 }, { "epoch": 0.3227187583623227, "grad_norm": 31.684595108032227, "learning_rate": 6.772812416376773e-07, "loss": 0.4151, "step": 2412 }, { "epoch": 0.32285255552582287, "grad_norm": 15.628280639648438, "learning_rate": 6.771474444741772e-07, "loss": 0.4552, "step": 2413 }, { "epoch": 0.32298635268932296, "grad_norm": 24.34259605407715, "learning_rate": 6.77013647310677e-07, "loss": 0.6426, "step": 2414 }, { "epoch": 0.3231201498528231, "grad_norm": 21.632080078125, "learning_rate": 6.768798501471768e-07, "loss": 0.3039, "step": 2415 }, { "epoch": 0.32325394701632326, "grad_norm": 32.88502502441406, "learning_rate": 6.767460529836767e-07, "loss": 0.6894, "step": 2416 }, { "epoch": 0.3233877441798234, "grad_norm": 19.640554428100586, "learning_rate": 6.766122558201766e-07, "loss": 0.5975, "step": 2417 }, { "epoch": 0.3235215413433235, "grad_norm": 39.280662536621094, "learning_rate": 6.764784586566764e-07, "loss": 0.6917, "step": 2418 }, { "epoch": 0.32365533850682365, "grad_norm": 13.8489408493042, "learning_rate": 6.763446614931763e-07, "loss": 0.4754, "step": 2419 }, { "epoch": 0.3237891356703238, "grad_norm": 20.775407791137695, "learning_rate": 6.762108643296762e-07, "loss": 0.3122, "step": 2420 }, { "epoch": 0.3239229328338239, "grad_norm": 22.262475967407227, "learning_rate": 6.760770671661761e-07, "loss": 0.519, "step": 2421 }, { "epoch": 0.32405672999732404, "grad_norm": 24.011404037475586, "learning_rate": 6.759432700026759e-07, "loss": 0.4826, "step": 2422 }, { "epoch": 0.3241905271608242, "grad_norm": 19.89497947692871, "learning_rate": 6.758094728391758e-07, "loss": 0.5669, "step": 2423 }, { "epoch": 0.32432432432432434, "grad_norm": 15.204484939575195, "learning_rate": 6.756756756756756e-07, "loss": 0.5699, "step": 2424 }, { "epoch": 0.32445812148782444, "grad_norm": 11.72768497467041, "learning_rate": 6.755418785121755e-07, "loss": 0.3661, "step": 2425 }, { "epoch": 0.3245919186513246, "grad_norm": 15.019089698791504, "learning_rate": 6.754080813486753e-07, "loss": 0.3511, "step": 2426 }, { "epoch": 0.32472571581482473, "grad_norm": 14.733039855957031, "learning_rate": 6.752742841851753e-07, "loss": 0.4706, "step": 2427 }, { "epoch": 0.3248595129783249, "grad_norm": 37.7657356262207, "learning_rate": 6.751404870216752e-07, "loss": 0.6391, "step": 2428 }, { "epoch": 0.324993310141825, "grad_norm": 27.72810935974121, "learning_rate": 6.75006689858175e-07, "loss": 0.5212, "step": 2429 }, { "epoch": 0.3251271073053251, "grad_norm": 25.001123428344727, "learning_rate": 6.748728926946748e-07, "loss": 0.3467, "step": 2430 }, { "epoch": 0.3252609044688253, "grad_norm": 25.821744918823242, "learning_rate": 6.747390955311747e-07, "loss": 0.6241, "step": 2431 }, { "epoch": 0.32539470163232537, "grad_norm": 20.443021774291992, "learning_rate": 6.746052983676746e-07, "loss": 0.4652, "step": 2432 }, { "epoch": 0.3255284987958255, "grad_norm": 15.039031028747559, "learning_rate": 6.744715012041743e-07, "loss": 0.4233, "step": 2433 }, { "epoch": 0.32566229595932566, "grad_norm": 23.688899993896484, "learning_rate": 6.743377040406743e-07, "loss": 0.5028, "step": 2434 }, { "epoch": 0.3257960931228258, "grad_norm": 22.200077056884766, "learning_rate": 6.742039068771742e-07, "loss": 0.5353, "step": 2435 }, { "epoch": 0.3259298902863259, "grad_norm": 12.01025104522705, "learning_rate": 6.740701097136741e-07, "loss": 0.4144, "step": 2436 }, { "epoch": 0.32606368744982606, "grad_norm": 19.344905853271484, "learning_rate": 6.739363125501739e-07, "loss": 0.3746, "step": 2437 }, { "epoch": 0.3261974846133262, "grad_norm": 13.204686164855957, "learning_rate": 6.738025153866737e-07, "loss": 0.5795, "step": 2438 }, { "epoch": 0.32633128177682635, "grad_norm": 20.539058685302734, "learning_rate": 6.736687182231736e-07, "loss": 0.3455, "step": 2439 }, { "epoch": 0.32646507894032645, "grad_norm": 13.792458534240723, "learning_rate": 6.735349210596735e-07, "loss": 0.3634, "step": 2440 }, { "epoch": 0.3265988761038266, "grad_norm": 23.289993286132812, "learning_rate": 6.734011238961733e-07, "loss": 0.5417, "step": 2441 }, { "epoch": 0.32673267326732675, "grad_norm": 22.892955780029297, "learning_rate": 6.732673267326733e-07, "loss": 0.3339, "step": 2442 }, { "epoch": 0.32686647043082684, "grad_norm": 20.50817108154297, "learning_rate": 6.731335295691731e-07, "loss": 0.5872, "step": 2443 }, { "epoch": 0.327000267594327, "grad_norm": 24.383560180664062, "learning_rate": 6.72999732405673e-07, "loss": 0.7451, "step": 2444 }, { "epoch": 0.32713406475782714, "grad_norm": 15.978832244873047, "learning_rate": 6.728659352421728e-07, "loss": 0.5815, "step": 2445 }, { "epoch": 0.3272678619213273, "grad_norm": 34.919891357421875, "learning_rate": 6.727321380786727e-07, "loss": 0.3323, "step": 2446 }, { "epoch": 0.3274016590848274, "grad_norm": 15.528693199157715, "learning_rate": 6.725983409151726e-07, "loss": 0.3725, "step": 2447 }, { "epoch": 0.32753545624832753, "grad_norm": 25.75053596496582, "learning_rate": 6.724645437516724e-07, "loss": 0.4843, "step": 2448 }, { "epoch": 0.3276692534118277, "grad_norm": 14.175487518310547, "learning_rate": 6.723307465881723e-07, "loss": 0.4785, "step": 2449 }, { "epoch": 0.3278030505753278, "grad_norm": 32.87546920776367, "learning_rate": 6.721969494246722e-07, "loss": 0.4154, "step": 2450 }, { "epoch": 0.3279368477388279, "grad_norm": 28.692005157470703, "learning_rate": 6.720631522611721e-07, "loss": 0.394, "step": 2451 }, { "epoch": 0.32807064490232807, "grad_norm": 17.47056007385254, "learning_rate": 6.71929355097672e-07, "loss": 0.4906, "step": 2452 }, { "epoch": 0.3282044420658282, "grad_norm": 13.552511215209961, "learning_rate": 6.717955579341717e-07, "loss": 0.4248, "step": 2453 }, { "epoch": 0.32833823922932837, "grad_norm": 11.35498046875, "learning_rate": 6.716617607706716e-07, "loss": 0.4577, "step": 2454 }, { "epoch": 0.32847203639282846, "grad_norm": 16.189109802246094, "learning_rate": 6.715279636071715e-07, "loss": 0.3586, "step": 2455 }, { "epoch": 0.3286058335563286, "grad_norm": 13.340205192565918, "learning_rate": 6.713941664436715e-07, "loss": 0.4064, "step": 2456 }, { "epoch": 0.32873963071982876, "grad_norm": 22.635034561157227, "learning_rate": 6.712603692801712e-07, "loss": 0.4423, "step": 2457 }, { "epoch": 0.32887342788332885, "grad_norm": 28.862882614135742, "learning_rate": 6.711265721166711e-07, "loss": 0.5611, "step": 2458 }, { "epoch": 0.329007225046829, "grad_norm": 28.95750617980957, "learning_rate": 6.70992774953171e-07, "loss": 0.3597, "step": 2459 }, { "epoch": 0.32914102221032915, "grad_norm": 24.1147403717041, "learning_rate": 6.708589777896708e-07, "loss": 0.538, "step": 2460 }, { "epoch": 0.3292748193738293, "grad_norm": 27.171287536621094, "learning_rate": 6.707251806261707e-07, "loss": 0.3803, "step": 2461 }, { "epoch": 0.3294086165373294, "grad_norm": 17.658374786376953, "learning_rate": 6.705913834626705e-07, "loss": 0.5435, "step": 2462 }, { "epoch": 0.32954241370082954, "grad_norm": 18.721683502197266, "learning_rate": 6.704575862991704e-07, "loss": 0.4244, "step": 2463 }, { "epoch": 0.3296762108643297, "grad_norm": 13.519707679748535, "learning_rate": 6.703237891356703e-07, "loss": 0.2865, "step": 2464 }, { "epoch": 0.32981000802782984, "grad_norm": 35.236846923828125, "learning_rate": 6.701899919721702e-07, "loss": 0.2402, "step": 2465 }, { "epoch": 0.32994380519132993, "grad_norm": 30.77900505065918, "learning_rate": 6.7005619480867e-07, "loss": 0.4678, "step": 2466 }, { "epoch": 0.3300776023548301, "grad_norm": 38.14805221557617, "learning_rate": 6.699223976451699e-07, "loss": 0.5137, "step": 2467 }, { "epoch": 0.33021139951833023, "grad_norm": 10.040067672729492, "learning_rate": 6.697886004816697e-07, "loss": 0.3291, "step": 2468 }, { "epoch": 0.3303451966818303, "grad_norm": 21.216655731201172, "learning_rate": 6.696548033181696e-07, "loss": 0.5104, "step": 2469 }, { "epoch": 0.33047899384533047, "grad_norm": 16.454265594482422, "learning_rate": 6.695210061546695e-07, "loss": 0.3192, "step": 2470 }, { "epoch": 0.3306127910088306, "grad_norm": 28.91097068786621, "learning_rate": 6.693872089911694e-07, "loss": 0.458, "step": 2471 }, { "epoch": 0.33074658817233077, "grad_norm": 70.66761779785156, "learning_rate": 6.692534118276692e-07, "loss": 0.5353, "step": 2472 }, { "epoch": 0.33088038533583086, "grad_norm": 19.37079620361328, "learning_rate": 6.691196146641691e-07, "loss": 0.4775, "step": 2473 }, { "epoch": 0.331014182499331, "grad_norm": 14.3737211227417, "learning_rate": 6.68985817500669e-07, "loss": 0.4713, "step": 2474 }, { "epoch": 0.33114797966283116, "grad_norm": 19.038320541381836, "learning_rate": 6.688520203371689e-07, "loss": 0.3757, "step": 2475 }, { "epoch": 0.3312817768263313, "grad_norm": 21.292797088623047, "learning_rate": 6.687182231736686e-07, "loss": 0.5884, "step": 2476 }, { "epoch": 0.3314155739898314, "grad_norm": 25.68630027770996, "learning_rate": 6.685844260101685e-07, "loss": 0.509, "step": 2477 }, { "epoch": 0.33154937115333155, "grad_norm": 25.519760131835938, "learning_rate": 6.684506288466685e-07, "loss": 0.3504, "step": 2478 }, { "epoch": 0.3316831683168317, "grad_norm": 17.49401092529297, "learning_rate": 6.683168316831684e-07, "loss": 0.4796, "step": 2479 }, { "epoch": 0.3318169654803318, "grad_norm": 28.467187881469727, "learning_rate": 6.681830345196681e-07, "loss": 0.4242, "step": 2480 }, { "epoch": 0.33195076264383194, "grad_norm": 52.26398468017578, "learning_rate": 6.68049237356168e-07, "loss": 0.4248, "step": 2481 }, { "epoch": 0.3320845598073321, "grad_norm": 11.622859954833984, "learning_rate": 6.679154401926679e-07, "loss": 0.4897, "step": 2482 }, { "epoch": 0.33221835697083224, "grad_norm": 37.474483489990234, "learning_rate": 6.677816430291677e-07, "loss": 0.5796, "step": 2483 }, { "epoch": 0.33235215413433233, "grad_norm": 47.723243713378906, "learning_rate": 6.676478458656676e-07, "loss": 0.4424, "step": 2484 }, { "epoch": 0.3324859512978325, "grad_norm": 38.364009857177734, "learning_rate": 6.675140487021674e-07, "loss": 0.6962, "step": 2485 }, { "epoch": 0.33261974846133263, "grad_norm": 24.935142517089844, "learning_rate": 6.673802515386674e-07, "loss": 0.5812, "step": 2486 }, { "epoch": 0.3327535456248328, "grad_norm": 20.525949478149414, "learning_rate": 6.672464543751672e-07, "loss": 0.2865, "step": 2487 }, { "epoch": 0.3328873427883329, "grad_norm": 40.39585494995117, "learning_rate": 6.671126572116671e-07, "loss": 0.435, "step": 2488 }, { "epoch": 0.333021139951833, "grad_norm": 46.32539749145508, "learning_rate": 6.66978860048167e-07, "loss": 0.462, "step": 2489 }, { "epoch": 0.33315493711533317, "grad_norm": 15.431361198425293, "learning_rate": 6.668450628846668e-07, "loss": 0.3751, "step": 2490 }, { "epoch": 0.33328873427883327, "grad_norm": 14.310503959655762, "learning_rate": 6.667112657211666e-07, "loss": 0.3664, "step": 2491 }, { "epoch": 0.3334225314423334, "grad_norm": 22.84663200378418, "learning_rate": 6.665774685576665e-07, "loss": 0.603, "step": 2492 }, { "epoch": 0.33355632860583356, "grad_norm": 28.227821350097656, "learning_rate": 6.664436713941665e-07, "loss": 0.3221, "step": 2493 }, { "epoch": 0.3336901257693337, "grad_norm": 32.19366455078125, "learning_rate": 6.663098742306663e-07, "loss": 0.3644, "step": 2494 }, { "epoch": 0.3338239229328338, "grad_norm": 18.164682388305664, "learning_rate": 6.661760770671661e-07, "loss": 0.3179, "step": 2495 }, { "epoch": 0.33395772009633395, "grad_norm": 11.664417266845703, "learning_rate": 6.66042279903666e-07, "loss": 0.1539, "step": 2496 }, { "epoch": 0.3340915172598341, "grad_norm": 28.837417602539062, "learning_rate": 6.659084827401659e-07, "loss": 0.584, "step": 2497 }, { "epoch": 0.33422531442333425, "grad_norm": 14.26034164428711, "learning_rate": 6.657746855766658e-07, "loss": 0.3842, "step": 2498 }, { "epoch": 0.33435911158683435, "grad_norm": 18.10724639892578, "learning_rate": 6.656408884131655e-07, "loss": 0.4555, "step": 2499 }, { "epoch": 0.3344929087503345, "grad_norm": 37.688392639160156, "learning_rate": 6.655070912496655e-07, "loss": 0.7835, "step": 2500 }, { "epoch": 0.33462670591383464, "grad_norm": 20.77794075012207, "learning_rate": 6.653732940861654e-07, "loss": 0.534, "step": 2501 }, { "epoch": 0.33476050307733474, "grad_norm": 20.253660202026367, "learning_rate": 6.652394969226653e-07, "loss": 0.4545, "step": 2502 }, { "epoch": 0.3348943002408349, "grad_norm": 31.179622650146484, "learning_rate": 6.65105699759165e-07, "loss": 0.3028, "step": 2503 }, { "epoch": 0.33502809740433503, "grad_norm": 13.645355224609375, "learning_rate": 6.649719025956649e-07, "loss": 0.4099, "step": 2504 }, { "epoch": 0.3351618945678352, "grad_norm": 18.017040252685547, "learning_rate": 6.648381054321648e-07, "loss": 0.5424, "step": 2505 }, { "epoch": 0.3352956917313353, "grad_norm": 17.880096435546875, "learning_rate": 6.647043082686647e-07, "loss": 0.5259, "step": 2506 }, { "epoch": 0.3354294888948354, "grad_norm": 15.283754348754883, "learning_rate": 6.645705111051645e-07, "loss": 0.4131, "step": 2507 }, { "epoch": 0.3355632860583356, "grad_norm": 31.106599807739258, "learning_rate": 6.644367139416644e-07, "loss": 0.4231, "step": 2508 }, { "epoch": 0.3356970832218357, "grad_norm": 13.967517852783203, "learning_rate": 6.643029167781643e-07, "loss": 0.3762, "step": 2509 }, { "epoch": 0.3358308803853358, "grad_norm": 10.201699256896973, "learning_rate": 6.641691196146641e-07, "loss": 0.3936, "step": 2510 }, { "epoch": 0.33596467754883597, "grad_norm": 16.529380798339844, "learning_rate": 6.64035322451164e-07, "loss": 0.4224, "step": 2511 }, { "epoch": 0.3360984747123361, "grad_norm": 13.015746116638184, "learning_rate": 6.639015252876639e-07, "loss": 0.5275, "step": 2512 }, { "epoch": 0.3362322718758362, "grad_norm": 26.938623428344727, "learning_rate": 6.637677281241637e-07, "loss": 0.714, "step": 2513 }, { "epoch": 0.33636606903933636, "grad_norm": 39.603736877441406, "learning_rate": 6.636339309606635e-07, "loss": 0.3004, "step": 2514 }, { "epoch": 0.3364998662028365, "grad_norm": 15.891336441040039, "learning_rate": 6.635001337971635e-07, "loss": 0.6247, "step": 2515 }, { "epoch": 0.33663366336633666, "grad_norm": 15.846223831176758, "learning_rate": 6.633663366336634e-07, "loss": 0.435, "step": 2516 }, { "epoch": 0.33676746052983675, "grad_norm": 24.684972763061523, "learning_rate": 6.632325394701633e-07, "loss": 0.331, "step": 2517 }, { "epoch": 0.3369012576933369, "grad_norm": 16.637264251708984, "learning_rate": 6.63098742306663e-07, "loss": 0.6636, "step": 2518 }, { "epoch": 0.33703505485683705, "grad_norm": 28.64834213256836, "learning_rate": 6.629649451431629e-07, "loss": 0.6423, "step": 2519 }, { "epoch": 0.3371688520203372, "grad_norm": 24.000328063964844, "learning_rate": 6.628311479796628e-07, "loss": 0.2918, "step": 2520 }, { "epoch": 0.3373026491838373, "grad_norm": 18.8385066986084, "learning_rate": 6.626973508161627e-07, "loss": 0.4465, "step": 2521 }, { "epoch": 0.33743644634733744, "grad_norm": 10.964734077453613, "learning_rate": 6.625635536526625e-07, "loss": 0.3965, "step": 2522 }, { "epoch": 0.3375702435108376, "grad_norm": 25.067245483398438, "learning_rate": 6.624297564891624e-07, "loss": 0.3981, "step": 2523 }, { "epoch": 0.3377040406743377, "grad_norm": 17.179607391357422, "learning_rate": 6.622959593256623e-07, "loss": 0.5563, "step": 2524 }, { "epoch": 0.33783783783783783, "grad_norm": 28.711801528930664, "learning_rate": 6.621621621621622e-07, "loss": 0.3894, "step": 2525 }, { "epoch": 0.337971635001338, "grad_norm": 35.87749481201172, "learning_rate": 6.62028364998662e-07, "loss": 0.5035, "step": 2526 }, { "epoch": 0.3381054321648381, "grad_norm": 17.836139678955078, "learning_rate": 6.618945678351618e-07, "loss": 0.2366, "step": 2527 }, { "epoch": 0.3382392293283382, "grad_norm": 11.341130256652832, "learning_rate": 6.617607706716617e-07, "loss": 0.4257, "step": 2528 }, { "epoch": 0.33837302649183837, "grad_norm": 55.82463455200195, "learning_rate": 6.616269735081616e-07, "loss": 0.7648, "step": 2529 }, { "epoch": 0.3385068236553385, "grad_norm": 23.94535255432129, "learning_rate": 6.614931763446615e-07, "loss": 0.412, "step": 2530 }, { "epoch": 0.33864062081883867, "grad_norm": 31.913738250732422, "learning_rate": 6.613593791811614e-07, "loss": 0.5515, "step": 2531 }, { "epoch": 0.33877441798233876, "grad_norm": 16.436059951782227, "learning_rate": 6.612255820176612e-07, "loss": 0.4059, "step": 2532 }, { "epoch": 0.3389082151458389, "grad_norm": 13.506627082824707, "learning_rate": 6.610917848541611e-07, "loss": 0.4189, "step": 2533 }, { "epoch": 0.33904201230933906, "grad_norm": 9.770218849182129, "learning_rate": 6.609579876906609e-07, "loss": 0.3812, "step": 2534 }, { "epoch": 0.33917580947283915, "grad_norm": 10.90633487701416, "learning_rate": 6.608241905271608e-07, "loss": 0.3682, "step": 2535 }, { "epoch": 0.3393096066363393, "grad_norm": 14.182875633239746, "learning_rate": 6.606903933636606e-07, "loss": 0.5386, "step": 2536 }, { "epoch": 0.33944340379983945, "grad_norm": 24.454906463623047, "learning_rate": 6.605565962001606e-07, "loss": 0.4118, "step": 2537 }, { "epoch": 0.3395772009633396, "grad_norm": 19.507844924926758, "learning_rate": 6.604227990366604e-07, "loss": 0.5581, "step": 2538 }, { "epoch": 0.3397109981268397, "grad_norm": 14.450878143310547, "learning_rate": 6.602890018731603e-07, "loss": 0.4593, "step": 2539 }, { "epoch": 0.33984479529033984, "grad_norm": 12.01400375366211, "learning_rate": 6.601552047096602e-07, "loss": 0.4106, "step": 2540 }, { "epoch": 0.33997859245384, "grad_norm": 17.617136001586914, "learning_rate": 6.600214075461599e-07, "loss": 0.516, "step": 2541 }, { "epoch": 0.34011238961734014, "grad_norm": 18.46922492980957, "learning_rate": 6.598876103826598e-07, "loss": 0.7061, "step": 2542 }, { "epoch": 0.34024618678084023, "grad_norm": 18.498998641967773, "learning_rate": 6.597538132191597e-07, "loss": 0.4325, "step": 2543 }, { "epoch": 0.3403799839443404, "grad_norm": 34.273094177246094, "learning_rate": 6.596200160556597e-07, "loss": 0.4344, "step": 2544 }, { "epoch": 0.34051378110784053, "grad_norm": 33.1812744140625, "learning_rate": 6.594862188921595e-07, "loss": 0.4161, "step": 2545 }, { "epoch": 0.3406475782713406, "grad_norm": 27.876544952392578, "learning_rate": 6.593524217286593e-07, "loss": 0.3978, "step": 2546 }, { "epoch": 0.34078137543484077, "grad_norm": 26.90717124938965, "learning_rate": 6.592186245651592e-07, "loss": 0.4504, "step": 2547 }, { "epoch": 0.3409151725983409, "grad_norm": 15.258341789245605, "learning_rate": 6.590848274016591e-07, "loss": 0.4311, "step": 2548 }, { "epoch": 0.34104896976184107, "grad_norm": 14.050813674926758, "learning_rate": 6.589510302381589e-07, "loss": 0.421, "step": 2549 }, { "epoch": 0.34118276692534116, "grad_norm": 26.592561721801758, "learning_rate": 6.588172330746587e-07, "loss": 0.2436, "step": 2550 }, { "epoch": 0.3413165640888413, "grad_norm": 21.69256591796875, "learning_rate": 6.586834359111586e-07, "loss": 0.3831, "step": 2551 }, { "epoch": 0.34145036125234146, "grad_norm": 10.918766975402832, "learning_rate": 6.585496387476586e-07, "loss": 0.475, "step": 2552 }, { "epoch": 0.3415841584158416, "grad_norm": 27.10672950744629, "learning_rate": 6.584158415841584e-07, "loss": 0.6096, "step": 2553 }, { "epoch": 0.3417179555793417, "grad_norm": 13.279908180236816, "learning_rate": 6.582820444206583e-07, "loss": 0.4187, "step": 2554 }, { "epoch": 0.34185175274284185, "grad_norm": 25.63826560974121, "learning_rate": 6.581482472571581e-07, "loss": 0.3565, "step": 2555 }, { "epoch": 0.341985549906342, "grad_norm": 8.636947631835938, "learning_rate": 6.58014450093658e-07, "loss": 0.3741, "step": 2556 }, { "epoch": 0.3421193470698421, "grad_norm": 13.831912994384766, "learning_rate": 6.578806529301578e-07, "loss": 0.3801, "step": 2557 }, { "epoch": 0.34225314423334224, "grad_norm": 34.14496994018555, "learning_rate": 6.577468557666577e-07, "loss": 0.6489, "step": 2558 }, { "epoch": 0.3423869413968424, "grad_norm": 8.062405586242676, "learning_rate": 6.576130586031577e-07, "loss": 0.3726, "step": 2559 }, { "epoch": 0.34252073856034254, "grad_norm": 13.38794994354248, "learning_rate": 6.574792614396575e-07, "loss": 0.4475, "step": 2560 }, { "epoch": 0.34265453572384263, "grad_norm": 7.30664587020874, "learning_rate": 6.573454642761573e-07, "loss": 0.2403, "step": 2561 }, { "epoch": 0.3427883328873428, "grad_norm": 44.855499267578125, "learning_rate": 6.572116671126572e-07, "loss": 0.6499, "step": 2562 }, { "epoch": 0.34292213005084293, "grad_norm": 20.397808074951172, "learning_rate": 6.570778699491571e-07, "loss": 0.314, "step": 2563 }, { "epoch": 0.3430559272143431, "grad_norm": 19.703161239624023, "learning_rate": 6.569440727856568e-07, "loss": 0.3622, "step": 2564 }, { "epoch": 0.3431897243778432, "grad_norm": 12.872305870056152, "learning_rate": 6.568102756221567e-07, "loss": 0.3827, "step": 2565 }, { "epoch": 0.3433235215413433, "grad_norm": 44.727455139160156, "learning_rate": 6.566764784586567e-07, "loss": 0.2661, "step": 2566 }, { "epoch": 0.3434573187048435, "grad_norm": 18.093040466308594, "learning_rate": 6.565426812951566e-07, "loss": 0.4367, "step": 2567 }, { "epoch": 0.34359111586834357, "grad_norm": 21.51531219482422, "learning_rate": 6.564088841316564e-07, "loss": 0.5302, "step": 2568 }, { "epoch": 0.3437249130318437, "grad_norm": 13.685166358947754, "learning_rate": 6.562750869681562e-07, "loss": 0.3621, "step": 2569 }, { "epoch": 0.34385871019534386, "grad_norm": 23.848482131958008, "learning_rate": 6.561412898046561e-07, "loss": 0.5067, "step": 2570 }, { "epoch": 0.343992507358844, "grad_norm": 29.025598526000977, "learning_rate": 6.56007492641156e-07, "loss": 0.664, "step": 2571 }, { "epoch": 0.3441263045223441, "grad_norm": 18.537303924560547, "learning_rate": 6.558736954776558e-07, "loss": 0.4595, "step": 2572 }, { "epoch": 0.34426010168584426, "grad_norm": 15.619593620300293, "learning_rate": 6.557398983141556e-07, "loss": 0.601, "step": 2573 }, { "epoch": 0.3443938988493444, "grad_norm": 17.260116577148438, "learning_rate": 6.556061011506556e-07, "loss": 0.6119, "step": 2574 }, { "epoch": 0.34452769601284455, "grad_norm": 12.985549926757812, "learning_rate": 6.554723039871555e-07, "loss": 0.5792, "step": 2575 }, { "epoch": 0.34466149317634465, "grad_norm": 15.122232437133789, "learning_rate": 6.553385068236553e-07, "loss": 0.4131, "step": 2576 }, { "epoch": 0.3447952903398448, "grad_norm": 17.0864200592041, "learning_rate": 6.552047096601552e-07, "loss": 0.3589, "step": 2577 }, { "epoch": 0.34492908750334494, "grad_norm": 17.394800186157227, "learning_rate": 6.55070912496655e-07, "loss": 0.3606, "step": 2578 }, { "epoch": 0.34506288466684504, "grad_norm": 22.646024703979492, "learning_rate": 6.549371153331549e-07, "loss": 0.6647, "step": 2579 }, { "epoch": 0.3451966818303452, "grad_norm": 17.210433959960938, "learning_rate": 6.548033181696547e-07, "loss": 0.6784, "step": 2580 }, { "epoch": 0.34533047899384534, "grad_norm": 29.635759353637695, "learning_rate": 6.546695210061547e-07, "loss": 0.487, "step": 2581 }, { "epoch": 0.3454642761573455, "grad_norm": 32.341976165771484, "learning_rate": 6.545357238426546e-07, "loss": 0.525, "step": 2582 }, { "epoch": 0.3455980733208456, "grad_norm": 33.947509765625, "learning_rate": 6.544019266791544e-07, "loss": 0.5531, "step": 2583 }, { "epoch": 0.3457318704843457, "grad_norm": 13.591143608093262, "learning_rate": 6.542681295156542e-07, "loss": 0.455, "step": 2584 }, { "epoch": 0.3458656676478459, "grad_norm": 13.292569160461426, "learning_rate": 6.541343323521541e-07, "loss": 0.4482, "step": 2585 }, { "epoch": 0.345999464811346, "grad_norm": 18.130733489990234, "learning_rate": 6.54000535188654e-07, "loss": 0.3981, "step": 2586 }, { "epoch": 0.3461332619748461, "grad_norm": 14.518205642700195, "learning_rate": 6.538667380251537e-07, "loss": 0.4646, "step": 2587 }, { "epoch": 0.34626705913834627, "grad_norm": 15.296756744384766, "learning_rate": 6.537329408616536e-07, "loss": 0.6347, "step": 2588 }, { "epoch": 0.3464008563018464, "grad_norm": 8.330201148986816, "learning_rate": 6.535991436981536e-07, "loss": 0.2448, "step": 2589 }, { "epoch": 0.3465346534653465, "grad_norm": 34.973018646240234, "learning_rate": 6.534653465346535e-07, "loss": 0.5652, "step": 2590 }, { "epoch": 0.34666845062884666, "grad_norm": 13.829872131347656, "learning_rate": 6.533315493711533e-07, "loss": 0.4094, "step": 2591 }, { "epoch": 0.3468022477923468, "grad_norm": 15.85670280456543, "learning_rate": 6.531977522076531e-07, "loss": 0.4504, "step": 2592 }, { "epoch": 0.34693604495584696, "grad_norm": 24.843751907348633, "learning_rate": 6.53063955044153e-07, "loss": 0.3724, "step": 2593 }, { "epoch": 0.34706984211934705, "grad_norm": 22.484464645385742, "learning_rate": 6.529301578806529e-07, "loss": 0.5531, "step": 2594 }, { "epoch": 0.3472036392828472, "grad_norm": 48.29197311401367, "learning_rate": 6.527963607171527e-07, "loss": 0.7799, "step": 2595 }, { "epoch": 0.34733743644634735, "grad_norm": 12.906567573547363, "learning_rate": 6.526625635536527e-07, "loss": 0.3793, "step": 2596 }, { "epoch": 0.3474712336098475, "grad_norm": 14.492944717407227, "learning_rate": 6.525287663901525e-07, "loss": 0.5378, "step": 2597 }, { "epoch": 0.3476050307733476, "grad_norm": 21.507204055786133, "learning_rate": 6.523949692266524e-07, "loss": 0.4915, "step": 2598 }, { "epoch": 0.34773882793684774, "grad_norm": 11.376646041870117, "learning_rate": 6.522611720631522e-07, "loss": 0.4767, "step": 2599 }, { "epoch": 0.3478726251003479, "grad_norm": 11.886253356933594, "learning_rate": 6.521273748996521e-07, "loss": 0.3559, "step": 2600 }, { "epoch": 0.348006422263848, "grad_norm": 13.722391128540039, "learning_rate": 6.51993577736152e-07, "loss": 0.5135, "step": 2601 }, { "epoch": 0.34814021942734813, "grad_norm": 22.572994232177734, "learning_rate": 6.518597805726518e-07, "loss": 0.4762, "step": 2602 }, { "epoch": 0.3482740165908483, "grad_norm": 38.73652648925781, "learning_rate": 6.517259834091517e-07, "loss": 0.697, "step": 2603 }, { "epoch": 0.3484078137543484, "grad_norm": 44.919193267822266, "learning_rate": 6.515921862456516e-07, "loss": 0.6376, "step": 2604 }, { "epoch": 0.3485416109178485, "grad_norm": 11.577322959899902, "learning_rate": 6.514583890821515e-07, "loss": 0.5039, "step": 2605 }, { "epoch": 0.34867540808134867, "grad_norm": 12.800189018249512, "learning_rate": 6.513245919186513e-07, "loss": 0.4058, "step": 2606 }, { "epoch": 0.3488092052448488, "grad_norm": 11.90531063079834, "learning_rate": 6.511907947551511e-07, "loss": 0.5076, "step": 2607 }, { "epoch": 0.34894300240834897, "grad_norm": 18.67022132873535, "learning_rate": 6.51056997591651e-07, "loss": 0.4848, "step": 2608 }, { "epoch": 0.34907679957184906, "grad_norm": 20.28394889831543, "learning_rate": 6.509232004281509e-07, "loss": 0.4427, "step": 2609 }, { "epoch": 0.3492105967353492, "grad_norm": 25.767751693725586, "learning_rate": 6.507894032646508e-07, "loss": 0.4433, "step": 2610 }, { "epoch": 0.34934439389884936, "grad_norm": 30.025253295898438, "learning_rate": 6.506556061011506e-07, "loss": 0.3327, "step": 2611 }, { "epoch": 0.34947819106234945, "grad_norm": 30.1164493560791, "learning_rate": 6.505218089376505e-07, "loss": 0.4752, "step": 2612 }, { "epoch": 0.3496119882258496, "grad_norm": 13.06916332244873, "learning_rate": 6.503880117741504e-07, "loss": 0.3735, "step": 2613 }, { "epoch": 0.34974578538934975, "grad_norm": 9.624091148376465, "learning_rate": 6.502542146106502e-07, "loss": 0.4008, "step": 2614 }, { "epoch": 0.3498795825528499, "grad_norm": 43.892391204833984, "learning_rate": 6.5012041744715e-07, "loss": 0.4805, "step": 2615 }, { "epoch": 0.35001337971635, "grad_norm": 12.760629653930664, "learning_rate": 6.499866202836499e-07, "loss": 0.4068, "step": 2616 }, { "epoch": 0.35014717687985014, "grad_norm": 15.51252269744873, "learning_rate": 6.498528231201498e-07, "loss": 0.5382, "step": 2617 }, { "epoch": 0.3502809740433503, "grad_norm": 16.076465606689453, "learning_rate": 6.497190259566497e-07, "loss": 0.4974, "step": 2618 }, { "epoch": 0.35041477120685044, "grad_norm": 44.519161224365234, "learning_rate": 6.495852287931496e-07, "loss": 0.2953, "step": 2619 }, { "epoch": 0.35054856837035053, "grad_norm": 18.369691848754883, "learning_rate": 6.494514316296494e-07, "loss": 0.5477, "step": 2620 }, { "epoch": 0.3506823655338507, "grad_norm": 14.270296096801758, "learning_rate": 6.493176344661493e-07, "loss": 0.5067, "step": 2621 }, { "epoch": 0.35081616269735083, "grad_norm": 28.384641647338867, "learning_rate": 6.491838373026491e-07, "loss": 0.2929, "step": 2622 }, { "epoch": 0.3509499598608509, "grad_norm": 41.776458740234375, "learning_rate": 6.49050040139149e-07, "loss": 0.466, "step": 2623 }, { "epoch": 0.3510837570243511, "grad_norm": 38.9627685546875, "learning_rate": 6.489162429756489e-07, "loss": 0.7435, "step": 2624 }, { "epoch": 0.3512175541878512, "grad_norm": 9.646749496459961, "learning_rate": 6.487824458121488e-07, "loss": 0.2281, "step": 2625 }, { "epoch": 0.35135135135135137, "grad_norm": 31.460613250732422, "learning_rate": 6.486486486486486e-07, "loss": 0.4512, "step": 2626 }, { "epoch": 0.35148514851485146, "grad_norm": 18.793418884277344, "learning_rate": 6.485148514851485e-07, "loss": 0.3547, "step": 2627 }, { "epoch": 0.3516189456783516, "grad_norm": 35.73872756958008, "learning_rate": 6.483810543216484e-07, "loss": 0.6748, "step": 2628 }, { "epoch": 0.35175274284185176, "grad_norm": 9.270979881286621, "learning_rate": 6.482472571581483e-07, "loss": 0.17, "step": 2629 }, { "epoch": 0.3518865400053519, "grad_norm": 27.473915100097656, "learning_rate": 6.48113459994648e-07, "loss": 0.5878, "step": 2630 }, { "epoch": 0.352020337168852, "grad_norm": 20.047203063964844, "learning_rate": 6.479796628311479e-07, "loss": 0.4187, "step": 2631 }, { "epoch": 0.35215413433235215, "grad_norm": 38.27015686035156, "learning_rate": 6.478458656676478e-07, "loss": 0.6121, "step": 2632 }, { "epoch": 0.3522879314958523, "grad_norm": 33.37297439575195, "learning_rate": 6.477120685041478e-07, "loss": 0.7623, "step": 2633 }, { "epoch": 0.3524217286593524, "grad_norm": 22.72806167602539, "learning_rate": 6.475782713406475e-07, "loss": 0.3184, "step": 2634 }, { "epoch": 0.35255552582285254, "grad_norm": 19.929540634155273, "learning_rate": 6.474444741771474e-07, "loss": 0.5695, "step": 2635 }, { "epoch": 0.3526893229863527, "grad_norm": 21.45611572265625, "learning_rate": 6.473106770136473e-07, "loss": 0.34, "step": 2636 }, { "epoch": 0.35282312014985284, "grad_norm": 18.712635040283203, "learning_rate": 6.471768798501472e-07, "loss": 0.4085, "step": 2637 }, { "epoch": 0.35295691731335294, "grad_norm": 20.81340980529785, "learning_rate": 6.47043082686647e-07, "loss": 0.486, "step": 2638 }, { "epoch": 0.3530907144768531, "grad_norm": 23.663131713867188, "learning_rate": 6.469092855231468e-07, "loss": 0.6748, "step": 2639 }, { "epoch": 0.35322451164035323, "grad_norm": 14.73544979095459, "learning_rate": 6.467754883596468e-07, "loss": 0.4353, "step": 2640 }, { "epoch": 0.3533583088038534, "grad_norm": 17.683170318603516, "learning_rate": 6.466416911961466e-07, "loss": 0.5622, "step": 2641 }, { "epoch": 0.3534921059673535, "grad_norm": 20.597227096557617, "learning_rate": 6.465078940326465e-07, "loss": 0.5959, "step": 2642 }, { "epoch": 0.3536259031308536, "grad_norm": 16.454410552978516, "learning_rate": 6.463740968691463e-07, "loss": 0.5717, "step": 2643 }, { "epoch": 0.3537597002943538, "grad_norm": 17.92209243774414, "learning_rate": 6.462402997056462e-07, "loss": 0.4498, "step": 2644 }, { "epoch": 0.35389349745785387, "grad_norm": 20.942487716674805, "learning_rate": 6.46106502542146e-07, "loss": 0.2957, "step": 2645 }, { "epoch": 0.354027294621354, "grad_norm": 11.649075508117676, "learning_rate": 6.459727053786459e-07, "loss": 0.4172, "step": 2646 }, { "epoch": 0.35416109178485417, "grad_norm": 22.10218620300293, "learning_rate": 6.458389082151459e-07, "loss": 0.5566, "step": 2647 }, { "epoch": 0.3542948889483543, "grad_norm": 24.845516204833984, "learning_rate": 6.457051110516457e-07, "loss": 0.5933, "step": 2648 }, { "epoch": 0.3544286861118544, "grad_norm": 20.87465476989746, "learning_rate": 6.455713138881455e-07, "loss": 0.4062, "step": 2649 }, { "epoch": 0.35456248327535456, "grad_norm": 20.504047393798828, "learning_rate": 6.454375167246454e-07, "loss": 0.7131, "step": 2650 }, { "epoch": 0.3546962804388547, "grad_norm": 19.13882064819336, "learning_rate": 6.453037195611453e-07, "loss": 0.402, "step": 2651 }, { "epoch": 0.35483007760235485, "grad_norm": 15.571386337280273, "learning_rate": 6.451699223976452e-07, "loss": 0.4581, "step": 2652 }, { "epoch": 0.35496387476585495, "grad_norm": 16.624404907226562, "learning_rate": 6.450361252341449e-07, "loss": 0.2362, "step": 2653 }, { "epoch": 0.3550976719293551, "grad_norm": 23.587785720825195, "learning_rate": 6.449023280706448e-07, "loss": 0.5161, "step": 2654 }, { "epoch": 0.35523146909285525, "grad_norm": 21.2092227935791, "learning_rate": 6.447685309071448e-07, "loss": 0.3017, "step": 2655 }, { "epoch": 0.35536526625635534, "grad_norm": 20.9077205657959, "learning_rate": 6.446347337436447e-07, "loss": 0.4373, "step": 2656 }, { "epoch": 0.3554990634198555, "grad_norm": 20.579376220703125, "learning_rate": 6.445009365801444e-07, "loss": 0.4858, "step": 2657 }, { "epoch": 0.35563286058335564, "grad_norm": 9.320272445678711, "learning_rate": 6.443671394166443e-07, "loss": 0.4002, "step": 2658 }, { "epoch": 0.3557666577468558, "grad_norm": 11.308855056762695, "learning_rate": 6.442333422531442e-07, "loss": 0.4658, "step": 2659 }, { "epoch": 0.3559004549103559, "grad_norm": 23.598196029663086, "learning_rate": 6.440995450896441e-07, "loss": 0.4843, "step": 2660 }, { "epoch": 0.35603425207385603, "grad_norm": 35.88909912109375, "learning_rate": 6.439657479261439e-07, "loss": 0.5468, "step": 2661 }, { "epoch": 0.3561680492373562, "grad_norm": 27.633516311645508, "learning_rate": 6.438319507626438e-07, "loss": 0.5806, "step": 2662 }, { "epoch": 0.3563018464008563, "grad_norm": 32.24040222167969, "learning_rate": 6.436981535991437e-07, "loss": 0.5191, "step": 2663 }, { "epoch": 0.3564356435643564, "grad_norm": 17.724050521850586, "learning_rate": 6.435643564356436e-07, "loss": 0.3238, "step": 2664 }, { "epoch": 0.35656944072785657, "grad_norm": 24.41505241394043, "learning_rate": 6.434305592721434e-07, "loss": 0.4471, "step": 2665 }, { "epoch": 0.3567032378913567, "grad_norm": 21.887248992919922, "learning_rate": 6.432967621086433e-07, "loss": 0.4913, "step": 2666 }, { "epoch": 0.3568370350548568, "grad_norm": 9.997415542602539, "learning_rate": 6.431629649451431e-07, "loss": 0.4409, "step": 2667 }, { "epoch": 0.35697083221835696, "grad_norm": 34.94662857055664, "learning_rate": 6.430291677816429e-07, "loss": 0.3077, "step": 2668 }, { "epoch": 0.3571046293818571, "grad_norm": 14.16501235961914, "learning_rate": 6.428953706181429e-07, "loss": 0.2586, "step": 2669 }, { "epoch": 0.35723842654535726, "grad_norm": 20.645233154296875, "learning_rate": 6.427615734546428e-07, "loss": 0.2998, "step": 2670 }, { "epoch": 0.35737222370885735, "grad_norm": 22.954307556152344, "learning_rate": 6.426277762911427e-07, "loss": 0.7194, "step": 2671 }, { "epoch": 0.3575060208723575, "grad_norm": 9.805742263793945, "learning_rate": 6.424939791276424e-07, "loss": 0.5262, "step": 2672 }, { "epoch": 0.35763981803585765, "grad_norm": 12.345451354980469, "learning_rate": 6.423601819641423e-07, "loss": 0.3716, "step": 2673 }, { "epoch": 0.3577736151993578, "grad_norm": 17.96391487121582, "learning_rate": 6.422263848006422e-07, "loss": 0.3841, "step": 2674 }, { "epoch": 0.3579074123628579, "grad_norm": 17.421371459960938, "learning_rate": 6.420925876371421e-07, "loss": 0.4942, "step": 2675 }, { "epoch": 0.35804120952635804, "grad_norm": 52.47578430175781, "learning_rate": 6.419587904736418e-07, "loss": 0.8855, "step": 2676 }, { "epoch": 0.3581750066898582, "grad_norm": 16.329919815063477, "learning_rate": 6.418249933101418e-07, "loss": 0.3199, "step": 2677 }, { "epoch": 0.3583088038533583, "grad_norm": 38.92097473144531, "learning_rate": 6.416911961466417e-07, "loss": 0.2915, "step": 2678 }, { "epoch": 0.35844260101685843, "grad_norm": 16.22386360168457, "learning_rate": 6.415573989831416e-07, "loss": 0.4887, "step": 2679 }, { "epoch": 0.3585763981803586, "grad_norm": 45.306034088134766, "learning_rate": 6.414236018196414e-07, "loss": 0.5069, "step": 2680 }, { "epoch": 0.35871019534385873, "grad_norm": 25.896303176879883, "learning_rate": 6.412898046561412e-07, "loss": 0.5681, "step": 2681 }, { "epoch": 0.3588439925073588, "grad_norm": 23.942691802978516, "learning_rate": 6.411560074926411e-07, "loss": 0.3329, "step": 2682 }, { "epoch": 0.35897778967085897, "grad_norm": 28.82329750061035, "learning_rate": 6.41022210329141e-07, "loss": 0.456, "step": 2683 }, { "epoch": 0.3591115868343591, "grad_norm": 48.77520751953125, "learning_rate": 6.408884131656409e-07, "loss": 0.3357, "step": 2684 }, { "epoch": 0.35924538399785927, "grad_norm": 23.093339920043945, "learning_rate": 6.407546160021407e-07, "loss": 0.6395, "step": 2685 }, { "epoch": 0.35937918116135936, "grad_norm": 22.138385772705078, "learning_rate": 6.406208188386406e-07, "loss": 0.3723, "step": 2686 }, { "epoch": 0.3595129783248595, "grad_norm": 20.560720443725586, "learning_rate": 6.404870216751405e-07, "loss": 0.5508, "step": 2687 }, { "epoch": 0.35964677548835966, "grad_norm": 25.442167282104492, "learning_rate": 6.403532245116403e-07, "loss": 0.2904, "step": 2688 }, { "epoch": 0.35978057265185975, "grad_norm": 25.125669479370117, "learning_rate": 6.402194273481402e-07, "loss": 0.5491, "step": 2689 }, { "epoch": 0.3599143698153599, "grad_norm": 13.200005531311035, "learning_rate": 6.4008563018464e-07, "loss": 0.3787, "step": 2690 }, { "epoch": 0.36004816697886005, "grad_norm": 24.138212203979492, "learning_rate": 6.3995183302114e-07, "loss": 0.4939, "step": 2691 }, { "epoch": 0.3601819641423602, "grad_norm": 11.414193153381348, "learning_rate": 6.398180358576398e-07, "loss": 0.368, "step": 2692 }, { "epoch": 0.3603157613058603, "grad_norm": 18.814617156982422, "learning_rate": 6.396842386941397e-07, "loss": 0.6445, "step": 2693 }, { "epoch": 0.36044955846936044, "grad_norm": 24.2281494140625, "learning_rate": 6.395504415306396e-07, "loss": 0.3645, "step": 2694 }, { "epoch": 0.3605833556328606, "grad_norm": 12.628636360168457, "learning_rate": 6.394166443671393e-07, "loss": 0.5638, "step": 2695 }, { "epoch": 0.36071715279636074, "grad_norm": 16.387794494628906, "learning_rate": 6.392828472036392e-07, "loss": 0.7066, "step": 2696 }, { "epoch": 0.36085094995986083, "grad_norm": 28.188814163208008, "learning_rate": 6.391490500401391e-07, "loss": 0.2781, "step": 2697 }, { "epoch": 0.360984747123361, "grad_norm": 17.986421585083008, "learning_rate": 6.39015252876639e-07, "loss": 0.3682, "step": 2698 }, { "epoch": 0.36111854428686113, "grad_norm": 26.192975997924805, "learning_rate": 6.388814557131388e-07, "loss": 0.4132, "step": 2699 }, { "epoch": 0.3612523414503612, "grad_norm": 21.354320526123047, "learning_rate": 6.387476585496387e-07, "loss": 0.2598, "step": 2700 }, { "epoch": 0.3613861386138614, "grad_norm": 13.402697563171387, "learning_rate": 6.386138613861386e-07, "loss": 0.5309, "step": 2701 }, { "epoch": 0.3615199357773615, "grad_norm": 11.97231388092041, "learning_rate": 6.384800642226385e-07, "loss": 0.4575, "step": 2702 }, { "epoch": 0.36165373294086167, "grad_norm": 20.570375442504883, "learning_rate": 6.383462670591383e-07, "loss": 0.4511, "step": 2703 }, { "epoch": 0.36178753010436177, "grad_norm": 18.80512237548828, "learning_rate": 6.382124698956381e-07, "loss": 0.478, "step": 2704 }, { "epoch": 0.3619213272678619, "grad_norm": 26.73650550842285, "learning_rate": 6.38078672732138e-07, "loss": 0.3465, "step": 2705 }, { "epoch": 0.36205512443136206, "grad_norm": 10.293181419372559, "learning_rate": 6.37944875568638e-07, "loss": 0.2306, "step": 2706 }, { "epoch": 0.3621889215948622, "grad_norm": 32.66450500488281, "learning_rate": 6.378110784051378e-07, "loss": 0.5067, "step": 2707 }, { "epoch": 0.3623227187583623, "grad_norm": 40.90478515625, "learning_rate": 6.376772812416377e-07, "loss": 0.6753, "step": 2708 }, { "epoch": 0.36245651592186245, "grad_norm": 13.47771167755127, "learning_rate": 6.375434840781375e-07, "loss": 0.467, "step": 2709 }, { "epoch": 0.3625903130853626, "grad_norm": 14.885379791259766, "learning_rate": 6.374096869146374e-07, "loss": 0.4714, "step": 2710 }, { "epoch": 0.3627241102488627, "grad_norm": 17.03086280822754, "learning_rate": 6.372758897511372e-07, "loss": 0.419, "step": 2711 }, { "epoch": 0.36285790741236285, "grad_norm": 17.674564361572266, "learning_rate": 6.371420925876371e-07, "loss": 0.3164, "step": 2712 }, { "epoch": 0.362991704575863, "grad_norm": 27.890230178833008, "learning_rate": 6.37008295424137e-07, "loss": 0.403, "step": 2713 }, { "epoch": 0.36312550173936314, "grad_norm": 31.39132308959961, "learning_rate": 6.368744982606369e-07, "loss": 0.427, "step": 2714 }, { "epoch": 0.36325929890286324, "grad_norm": 23.48096466064453, "learning_rate": 6.367407010971367e-07, "loss": 0.3457, "step": 2715 }, { "epoch": 0.3633930960663634, "grad_norm": 34.77225112915039, "learning_rate": 6.366069039336366e-07, "loss": 0.5115, "step": 2716 }, { "epoch": 0.36352689322986353, "grad_norm": 16.94211769104004, "learning_rate": 6.364731067701365e-07, "loss": 0.391, "step": 2717 }, { "epoch": 0.3636606903933637, "grad_norm": 23.067020416259766, "learning_rate": 6.363393096066362e-07, "loss": 0.2307, "step": 2718 }, { "epoch": 0.3637944875568638, "grad_norm": 35.36826705932617, "learning_rate": 6.362055124431361e-07, "loss": 0.4477, "step": 2719 }, { "epoch": 0.3639282847203639, "grad_norm": 17.029403686523438, "learning_rate": 6.36071715279636e-07, "loss": 0.3068, "step": 2720 }, { "epoch": 0.3640620818838641, "grad_norm": 50.49969482421875, "learning_rate": 6.35937918116136e-07, "loss": 0.7767, "step": 2721 }, { "epoch": 0.36419587904736417, "grad_norm": 17.12599754333496, "learning_rate": 6.358041209526358e-07, "loss": 0.4827, "step": 2722 }, { "epoch": 0.3643296762108643, "grad_norm": 27.42835807800293, "learning_rate": 6.356703237891356e-07, "loss": 0.4273, "step": 2723 }, { "epoch": 0.36446347337436447, "grad_norm": 12.473685264587402, "learning_rate": 6.355365266256355e-07, "loss": 0.3581, "step": 2724 }, { "epoch": 0.3645972705378646, "grad_norm": 23.5173282623291, "learning_rate": 6.354027294621354e-07, "loss": 0.478, "step": 2725 }, { "epoch": 0.3647310677013647, "grad_norm": 39.53376388549805, "learning_rate": 6.352689322986352e-07, "loss": 0.4214, "step": 2726 }, { "epoch": 0.36486486486486486, "grad_norm": 37.01898956298828, "learning_rate": 6.35135135135135e-07, "loss": 0.2747, "step": 2727 }, { "epoch": 0.364998662028365, "grad_norm": 15.565531730651855, "learning_rate": 6.35001337971635e-07, "loss": 0.2503, "step": 2728 }, { "epoch": 0.36513245919186516, "grad_norm": 36.013275146484375, "learning_rate": 6.348675408081349e-07, "loss": 0.4569, "step": 2729 }, { "epoch": 0.36526625635536525, "grad_norm": 25.51862144470215, "learning_rate": 6.347337436446347e-07, "loss": 0.544, "step": 2730 }, { "epoch": 0.3654000535188654, "grad_norm": 42.768310546875, "learning_rate": 6.345999464811346e-07, "loss": 0.6353, "step": 2731 }, { "epoch": 0.36553385068236555, "grad_norm": 42.40343475341797, "learning_rate": 6.344661493176344e-07, "loss": 0.6544, "step": 2732 }, { "epoch": 0.3656676478458657, "grad_norm": 17.920948028564453, "learning_rate": 6.343323521541343e-07, "loss": 0.3676, "step": 2733 }, { "epoch": 0.3658014450093658, "grad_norm": 17.681612014770508, "learning_rate": 6.341985549906341e-07, "loss": 0.3173, "step": 2734 }, { "epoch": 0.36593524217286594, "grad_norm": 20.365583419799805, "learning_rate": 6.340647578271341e-07, "loss": 0.5808, "step": 2735 }, { "epoch": 0.3660690393363661, "grad_norm": 27.61241340637207, "learning_rate": 6.33930960663634e-07, "loss": 0.5045, "step": 2736 }, { "epoch": 0.3662028364998662, "grad_norm": 16.888240814208984, "learning_rate": 6.337971635001338e-07, "loss": 0.4553, "step": 2737 }, { "epoch": 0.36633663366336633, "grad_norm": 20.936532974243164, "learning_rate": 6.336633663366336e-07, "loss": 0.3971, "step": 2738 }, { "epoch": 0.3664704308268665, "grad_norm": 16.55514907836914, "learning_rate": 6.335295691731335e-07, "loss": 0.4097, "step": 2739 }, { "epoch": 0.3666042279903666, "grad_norm": 17.56923484802246, "learning_rate": 6.333957720096334e-07, "loss": 0.4471, "step": 2740 }, { "epoch": 0.3667380251538667, "grad_norm": 20.065027236938477, "learning_rate": 6.332619748461332e-07, "loss": 0.3744, "step": 2741 }, { "epoch": 0.36687182231736687, "grad_norm": 17.284454345703125, "learning_rate": 6.33128177682633e-07, "loss": 0.5024, "step": 2742 }, { "epoch": 0.367005619480867, "grad_norm": 25.426700592041016, "learning_rate": 6.32994380519133e-07, "loss": 0.4943, "step": 2743 }, { "epoch": 0.36713941664436717, "grad_norm": 38.731014251708984, "learning_rate": 6.328605833556329e-07, "loss": 0.7664, "step": 2744 }, { "epoch": 0.36727321380786726, "grad_norm": 24.87837028503418, "learning_rate": 6.327267861921327e-07, "loss": 0.5002, "step": 2745 }, { "epoch": 0.3674070109713674, "grad_norm": 11.269930839538574, "learning_rate": 6.325929890286325e-07, "loss": 0.3832, "step": 2746 }, { "epoch": 0.36754080813486756, "grad_norm": 14.632765769958496, "learning_rate": 6.324591918651324e-07, "loss": 0.3245, "step": 2747 }, { "epoch": 0.36767460529836765, "grad_norm": 14.732487678527832, "learning_rate": 6.323253947016323e-07, "loss": 0.3846, "step": 2748 }, { "epoch": 0.3678084024618678, "grad_norm": 16.879737854003906, "learning_rate": 6.321915975381321e-07, "loss": 0.4455, "step": 2749 }, { "epoch": 0.36794219962536795, "grad_norm": 13.176748275756836, "learning_rate": 6.320578003746321e-07, "loss": 0.4798, "step": 2750 }, { "epoch": 0.3680759967888681, "grad_norm": 25.155397415161133, "learning_rate": 6.319240032111319e-07, "loss": 0.4768, "step": 2751 }, { "epoch": 0.3682097939523682, "grad_norm": 16.44510841369629, "learning_rate": 6.317902060476318e-07, "loss": 0.4084, "step": 2752 }, { "epoch": 0.36834359111586834, "grad_norm": 15.343328475952148, "learning_rate": 6.316564088841316e-07, "loss": 0.5006, "step": 2753 }, { "epoch": 0.3684773882793685, "grad_norm": 21.012611389160156, "learning_rate": 6.315226117206315e-07, "loss": 0.5262, "step": 2754 }, { "epoch": 0.36861118544286864, "grad_norm": 12.121697425842285, "learning_rate": 6.313888145571313e-07, "loss": 0.3799, "step": 2755 }, { "epoch": 0.36874498260636873, "grad_norm": 16.376001358032227, "learning_rate": 6.312550173936312e-07, "loss": 0.3916, "step": 2756 }, { "epoch": 0.3688787797698689, "grad_norm": 19.103839874267578, "learning_rate": 6.311212202301311e-07, "loss": 0.4858, "step": 2757 }, { "epoch": 0.36901257693336903, "grad_norm": 21.438522338867188, "learning_rate": 6.30987423066631e-07, "loss": 0.3258, "step": 2758 }, { "epoch": 0.3691463740968691, "grad_norm": 31.311235427856445, "learning_rate": 6.308536259031309e-07, "loss": 0.5855, "step": 2759 }, { "epoch": 0.36928017126036927, "grad_norm": 15.92721176147461, "learning_rate": 6.307198287396307e-07, "loss": 0.3973, "step": 2760 }, { "epoch": 0.3694139684238694, "grad_norm": 12.54722785949707, "learning_rate": 6.305860315761305e-07, "loss": 0.3445, "step": 2761 }, { "epoch": 0.36954776558736957, "grad_norm": 13.141907691955566, "learning_rate": 6.304522344126304e-07, "loss": 0.4654, "step": 2762 }, { "epoch": 0.36968156275086966, "grad_norm": 15.78504753112793, "learning_rate": 6.303184372491303e-07, "loss": 0.4218, "step": 2763 }, { "epoch": 0.3698153599143698, "grad_norm": 47.69889831542969, "learning_rate": 6.301846400856302e-07, "loss": 0.5456, "step": 2764 }, { "epoch": 0.36994915707786996, "grad_norm": 11.313148498535156, "learning_rate": 6.3005084292213e-07, "loss": 0.2222, "step": 2765 }, { "epoch": 0.3700829542413701, "grad_norm": 22.671520233154297, "learning_rate": 6.299170457586299e-07, "loss": 0.4297, "step": 2766 }, { "epoch": 0.3702167514048702, "grad_norm": 18.39501190185547, "learning_rate": 6.297832485951298e-07, "loss": 0.3892, "step": 2767 }, { "epoch": 0.37035054856837035, "grad_norm": 10.713227272033691, "learning_rate": 6.296494514316297e-07, "loss": 0.386, "step": 2768 }, { "epoch": 0.3704843457318705, "grad_norm": 17.114173889160156, "learning_rate": 6.295156542681294e-07, "loss": 0.3008, "step": 2769 }, { "epoch": 0.3706181428953706, "grad_norm": 11.033141136169434, "learning_rate": 6.293818571046293e-07, "loss": 0.4181, "step": 2770 }, { "epoch": 0.37075194005887074, "grad_norm": 28.37913703918457, "learning_rate": 6.292480599411292e-07, "loss": 0.5381, "step": 2771 }, { "epoch": 0.3708857372223709, "grad_norm": 23.73164939880371, "learning_rate": 6.291142627776292e-07, "loss": 0.255, "step": 2772 }, { "epoch": 0.37101953438587104, "grad_norm": 22.1763858795166, "learning_rate": 6.28980465614129e-07, "loss": 0.5199, "step": 2773 }, { "epoch": 0.37115333154937113, "grad_norm": 15.655139923095703, "learning_rate": 6.288466684506288e-07, "loss": 0.4055, "step": 2774 }, { "epoch": 0.3712871287128713, "grad_norm": 17.125232696533203, "learning_rate": 6.287128712871287e-07, "loss": 0.4048, "step": 2775 }, { "epoch": 0.37142092587637143, "grad_norm": 16.156639099121094, "learning_rate": 6.285790741236285e-07, "loss": 0.5058, "step": 2776 }, { "epoch": 0.3715547230398716, "grad_norm": 41.900447845458984, "learning_rate": 6.284452769601284e-07, "loss": 0.6972, "step": 2777 }, { "epoch": 0.3716885202033717, "grad_norm": 11.838696479797363, "learning_rate": 6.283114797966283e-07, "loss": 0.3884, "step": 2778 }, { "epoch": 0.3718223173668718, "grad_norm": 8.151738166809082, "learning_rate": 6.281776826331282e-07, "loss": 0.2792, "step": 2779 }, { "epoch": 0.371956114530372, "grad_norm": 18.36866569519043, "learning_rate": 6.28043885469628e-07, "loss": 0.4717, "step": 2780 }, { "epoch": 0.37208991169387207, "grad_norm": 15.869414329528809, "learning_rate": 6.279100883061279e-07, "loss": 0.2885, "step": 2781 }, { "epoch": 0.3722237088573722, "grad_norm": 17.53122901916504, "learning_rate": 6.277762911426278e-07, "loss": 0.3451, "step": 2782 }, { "epoch": 0.37235750602087236, "grad_norm": 21.45560646057129, "learning_rate": 6.276424939791276e-07, "loss": 0.5167, "step": 2783 }, { "epoch": 0.3724913031843725, "grad_norm": 15.209175109863281, "learning_rate": 6.275086968156274e-07, "loss": 0.3922, "step": 2784 }, { "epoch": 0.3726251003478726, "grad_norm": 13.778229713439941, "learning_rate": 6.273748996521273e-07, "loss": 0.3589, "step": 2785 }, { "epoch": 0.37275889751137276, "grad_norm": 12.71464729309082, "learning_rate": 6.272411024886272e-07, "loss": 0.2898, "step": 2786 }, { "epoch": 0.3728926946748729, "grad_norm": 19.121789932250977, "learning_rate": 6.271073053251272e-07, "loss": 0.4213, "step": 2787 }, { "epoch": 0.37302649183837305, "grad_norm": 15.830843925476074, "learning_rate": 6.269735081616269e-07, "loss": 0.6781, "step": 2788 }, { "epoch": 0.37316028900187315, "grad_norm": 26.435184478759766, "learning_rate": 6.268397109981268e-07, "loss": 0.4478, "step": 2789 }, { "epoch": 0.3732940861653733, "grad_norm": 29.02290153503418, "learning_rate": 6.267059138346267e-07, "loss": 0.6371, "step": 2790 }, { "epoch": 0.37342788332887344, "grad_norm": 22.75916862487793, "learning_rate": 6.265721166711266e-07, "loss": 0.5217, "step": 2791 }, { "epoch": 0.37356168049237354, "grad_norm": 12.764261245727539, "learning_rate": 6.264383195076263e-07, "loss": 0.3816, "step": 2792 }, { "epoch": 0.3736954776558737, "grad_norm": 21.527854919433594, "learning_rate": 6.263045223441262e-07, "loss": 0.4978, "step": 2793 }, { "epoch": 0.37382927481937384, "grad_norm": 25.421560287475586, "learning_rate": 6.261707251806262e-07, "loss": 0.5037, "step": 2794 }, { "epoch": 0.373963071982874, "grad_norm": 12.428546905517578, "learning_rate": 6.260369280171261e-07, "loss": 0.3667, "step": 2795 }, { "epoch": 0.3740968691463741, "grad_norm": 15.07792854309082, "learning_rate": 6.259031308536259e-07, "loss": 0.3949, "step": 2796 }, { "epoch": 0.3742306663098742, "grad_norm": 15.060802459716797, "learning_rate": 6.257693336901257e-07, "loss": 0.4983, "step": 2797 }, { "epoch": 0.3743644634733744, "grad_norm": 23.956384658813477, "learning_rate": 6.256355365266256e-07, "loss": 0.442, "step": 2798 }, { "epoch": 0.3744982606368745, "grad_norm": 14.829912185668945, "learning_rate": 6.255017393631254e-07, "loss": 0.4367, "step": 2799 }, { "epoch": 0.3746320578003746, "grad_norm": 16.898496627807617, "learning_rate": 6.253679421996253e-07, "loss": 0.4046, "step": 2800 }, { "epoch": 0.37476585496387477, "grad_norm": 11.349881172180176, "learning_rate": 6.252341450361253e-07, "loss": 0.3169, "step": 2801 }, { "epoch": 0.3748996521273749, "grad_norm": 13.748571395874023, "learning_rate": 6.251003478726251e-07, "loss": 0.4902, "step": 2802 }, { "epoch": 0.375033449290875, "grad_norm": 28.690990447998047, "learning_rate": 6.249665507091249e-07, "loss": 0.4597, "step": 2803 }, { "epoch": 0.37516724645437516, "grad_norm": 21.93988609313965, "learning_rate": 6.248327535456248e-07, "loss": 0.4597, "step": 2804 }, { "epoch": 0.3753010436178753, "grad_norm": 44.86304473876953, "learning_rate": 6.246989563821247e-07, "loss": 0.4966, "step": 2805 }, { "epoch": 0.37543484078137546, "grad_norm": 19.433385848999023, "learning_rate": 6.245651592186246e-07, "loss": 0.5091, "step": 2806 }, { "epoch": 0.37556863794487555, "grad_norm": 29.948211669921875, "learning_rate": 6.244313620551243e-07, "loss": 0.492, "step": 2807 }, { "epoch": 0.3757024351083757, "grad_norm": 15.415376663208008, "learning_rate": 6.242975648916242e-07, "loss": 0.4283, "step": 2808 }, { "epoch": 0.37583623227187585, "grad_norm": 13.159832000732422, "learning_rate": 6.241637677281242e-07, "loss": 0.4077, "step": 2809 }, { "epoch": 0.375970029435376, "grad_norm": 32.30062484741211, "learning_rate": 6.240299705646241e-07, "loss": 0.6422, "step": 2810 }, { "epoch": 0.3761038265988761, "grad_norm": 36.37200927734375, "learning_rate": 6.238961734011238e-07, "loss": 0.7964, "step": 2811 }, { "epoch": 0.37623762376237624, "grad_norm": 28.12065887451172, "learning_rate": 6.237623762376237e-07, "loss": 0.6329, "step": 2812 }, { "epoch": 0.3763714209258764, "grad_norm": 21.1434383392334, "learning_rate": 6.236285790741236e-07, "loss": 0.4171, "step": 2813 }, { "epoch": 0.3765052180893765, "grad_norm": 16.07000732421875, "learning_rate": 6.234947819106235e-07, "loss": 0.3984, "step": 2814 }, { "epoch": 0.37663901525287663, "grad_norm": 32.060760498046875, "learning_rate": 6.233609847471233e-07, "loss": 0.6674, "step": 2815 }, { "epoch": 0.3767728124163768, "grad_norm": 20.795835494995117, "learning_rate": 6.232271875836232e-07, "loss": 0.4919, "step": 2816 }, { "epoch": 0.37690660957987693, "grad_norm": 29.664756774902344, "learning_rate": 6.230933904201231e-07, "loss": 0.3629, "step": 2817 }, { "epoch": 0.377040406743377, "grad_norm": 13.241495132446289, "learning_rate": 6.22959593256623e-07, "loss": 0.3861, "step": 2818 }, { "epoch": 0.37717420390687717, "grad_norm": 12.99200439453125, "learning_rate": 6.228257960931228e-07, "loss": 0.4676, "step": 2819 }, { "epoch": 0.3773080010703773, "grad_norm": 12.416762351989746, "learning_rate": 6.226919989296227e-07, "loss": 0.4774, "step": 2820 }, { "epoch": 0.37744179823387747, "grad_norm": 12.224199295043945, "learning_rate": 6.225582017661225e-07, "loss": 0.4195, "step": 2821 }, { "epoch": 0.37757559539737756, "grad_norm": 60.9550895690918, "learning_rate": 6.224244046026223e-07, "loss": 0.6602, "step": 2822 }, { "epoch": 0.3777093925608777, "grad_norm": 20.460811614990234, "learning_rate": 6.222906074391223e-07, "loss": 0.3559, "step": 2823 }, { "epoch": 0.37784318972437786, "grad_norm": 19.924724578857422, "learning_rate": 6.221568102756222e-07, "loss": 0.3538, "step": 2824 }, { "epoch": 0.37797698688787795, "grad_norm": 11.000890731811523, "learning_rate": 6.22023013112122e-07, "loss": 0.3129, "step": 2825 }, { "epoch": 0.3781107840513781, "grad_norm": 10.3623046875, "learning_rate": 6.218892159486218e-07, "loss": 0.3675, "step": 2826 }, { "epoch": 0.37824458121487825, "grad_norm": 22.451169967651367, "learning_rate": 6.217554187851217e-07, "loss": 0.5549, "step": 2827 }, { "epoch": 0.3783783783783784, "grad_norm": 20.01016616821289, "learning_rate": 6.216216216216216e-07, "loss": 0.4271, "step": 2828 }, { "epoch": 0.3785121755418785, "grad_norm": 38.446659088134766, "learning_rate": 6.214878244581215e-07, "loss": 0.6022, "step": 2829 }, { "epoch": 0.37864597270537864, "grad_norm": 14.814497947692871, "learning_rate": 6.213540272946212e-07, "loss": 0.4044, "step": 2830 }, { "epoch": 0.3787797698688788, "grad_norm": 10.01783275604248, "learning_rate": 6.212202301311212e-07, "loss": 0.3536, "step": 2831 }, { "epoch": 0.37891356703237894, "grad_norm": 27.160165786743164, "learning_rate": 6.210864329676211e-07, "loss": 0.4089, "step": 2832 }, { "epoch": 0.37904736419587903, "grad_norm": 7.1077399253845215, "learning_rate": 6.20952635804121e-07, "loss": 0.3792, "step": 2833 }, { "epoch": 0.3791811613593792, "grad_norm": 16.126585006713867, "learning_rate": 6.208188386406207e-07, "loss": 0.3709, "step": 2834 }, { "epoch": 0.37931495852287933, "grad_norm": 9.44439697265625, "learning_rate": 6.206850414771206e-07, "loss": 0.2534, "step": 2835 }, { "epoch": 0.3794487556863794, "grad_norm": 16.871994018554688, "learning_rate": 6.205512443136205e-07, "loss": 0.455, "step": 2836 }, { "epoch": 0.3795825528498796, "grad_norm": 17.378719329833984, "learning_rate": 6.204174471501204e-07, "loss": 0.6368, "step": 2837 }, { "epoch": 0.3797163500133797, "grad_norm": 27.583776473999023, "learning_rate": 6.202836499866203e-07, "loss": 0.4882, "step": 2838 }, { "epoch": 0.37985014717687987, "grad_norm": 24.936336517333984, "learning_rate": 6.201498528231201e-07, "loss": 0.3152, "step": 2839 }, { "epoch": 0.37998394434037996, "grad_norm": 29.5897159576416, "learning_rate": 6.2001605565962e-07, "loss": 0.4155, "step": 2840 }, { "epoch": 0.3801177415038801, "grad_norm": 12.387731552124023, "learning_rate": 6.198822584961199e-07, "loss": 0.3482, "step": 2841 }, { "epoch": 0.38025153866738026, "grad_norm": 26.065370559692383, "learning_rate": 6.197484613326197e-07, "loss": 0.4712, "step": 2842 }, { "epoch": 0.3803853358308804, "grad_norm": 11.96097469329834, "learning_rate": 6.196146641691196e-07, "loss": 0.3776, "step": 2843 }, { "epoch": 0.3805191329943805, "grad_norm": 13.700599670410156, "learning_rate": 6.194808670056194e-07, "loss": 0.2253, "step": 2844 }, { "epoch": 0.38065293015788065, "grad_norm": 23.083343505859375, "learning_rate": 6.193470698421194e-07, "loss": 0.3539, "step": 2845 }, { "epoch": 0.3807867273213808, "grad_norm": 60.10033416748047, "learning_rate": 6.192132726786192e-07, "loss": 0.7998, "step": 2846 }, { "epoch": 0.3809205244848809, "grad_norm": 33.61488723754883, "learning_rate": 6.190794755151191e-07, "loss": 0.4604, "step": 2847 }, { "epoch": 0.38105432164838104, "grad_norm": 39.87495803833008, "learning_rate": 6.18945678351619e-07, "loss": 0.2484, "step": 2848 }, { "epoch": 0.3811881188118812, "grad_norm": 16.88115882873535, "learning_rate": 6.188118811881187e-07, "loss": 0.5847, "step": 2849 }, { "epoch": 0.38132191597538134, "grad_norm": 22.76394271850586, "learning_rate": 6.186780840246186e-07, "loss": 0.3847, "step": 2850 }, { "epoch": 0.38145571313888144, "grad_norm": 36.333526611328125, "learning_rate": 6.185442868611185e-07, "loss": 0.4949, "step": 2851 }, { "epoch": 0.3815895103023816, "grad_norm": 21.85832977294922, "learning_rate": 6.184104896976184e-07, "loss": 0.4345, "step": 2852 }, { "epoch": 0.38172330746588173, "grad_norm": 15.886068344116211, "learning_rate": 6.182766925341182e-07, "loss": 0.4189, "step": 2853 }, { "epoch": 0.3818571046293819, "grad_norm": 34.45877456665039, "learning_rate": 6.181428953706181e-07, "loss": 0.7794, "step": 2854 }, { "epoch": 0.381990901792882, "grad_norm": 57.4830207824707, "learning_rate": 6.18009098207118e-07, "loss": 0.7219, "step": 2855 }, { "epoch": 0.3821246989563821, "grad_norm": 16.039546966552734, "learning_rate": 6.178753010436179e-07, "loss": 0.293, "step": 2856 }, { "epoch": 0.3822584961198823, "grad_norm": 21.90696144104004, "learning_rate": 6.177415038801177e-07, "loss": 0.2627, "step": 2857 }, { "epoch": 0.38239229328338237, "grad_norm": 23.52309799194336, "learning_rate": 6.176077067166175e-07, "loss": 0.3419, "step": 2858 }, { "epoch": 0.3825260904468825, "grad_norm": 12.992916107177734, "learning_rate": 6.174739095531174e-07, "loss": 0.3952, "step": 2859 }, { "epoch": 0.38265988761038267, "grad_norm": 15.164949417114258, "learning_rate": 6.173401123896174e-07, "loss": 0.4673, "step": 2860 }, { "epoch": 0.3827936847738828, "grad_norm": 16.419382095336914, "learning_rate": 6.172063152261172e-07, "loss": 0.2715, "step": 2861 }, { "epoch": 0.3829274819373829, "grad_norm": 21.950267791748047, "learning_rate": 6.17072518062617e-07, "loss": 0.4334, "step": 2862 }, { "epoch": 0.38306127910088306, "grad_norm": 23.6065673828125, "learning_rate": 6.169387208991169e-07, "loss": 0.4745, "step": 2863 }, { "epoch": 0.3831950762643832, "grad_norm": 28.08837890625, "learning_rate": 6.168049237356168e-07, "loss": 0.5294, "step": 2864 }, { "epoch": 0.38332887342788335, "grad_norm": 11.335433006286621, "learning_rate": 6.166711265721166e-07, "loss": 0.296, "step": 2865 }, { "epoch": 0.38346267059138345, "grad_norm": 18.219894409179688, "learning_rate": 6.165373294086165e-07, "loss": 0.3911, "step": 2866 }, { "epoch": 0.3835964677548836, "grad_norm": 12.700651168823242, "learning_rate": 6.164035322451164e-07, "loss": 0.4345, "step": 2867 }, { "epoch": 0.38373026491838375, "grad_norm": 28.09602165222168, "learning_rate": 6.162697350816163e-07, "loss": 0.4358, "step": 2868 }, { "epoch": 0.38386406208188384, "grad_norm": 25.73479461669922, "learning_rate": 6.161359379181161e-07, "loss": 0.4981, "step": 2869 }, { "epoch": 0.383997859245384, "grad_norm": 24.68000602722168, "learning_rate": 6.16002140754616e-07, "loss": 0.54, "step": 2870 }, { "epoch": 0.38413165640888414, "grad_norm": 14.816701889038086, "learning_rate": 6.158683435911159e-07, "loss": 0.4474, "step": 2871 }, { "epoch": 0.3842654535723843, "grad_norm": 19.515628814697266, "learning_rate": 6.157345464276157e-07, "loss": 0.4308, "step": 2872 }, { "epoch": 0.3843992507358844, "grad_norm": 21.24599266052246, "learning_rate": 6.156007492641155e-07, "loss": 0.3067, "step": 2873 }, { "epoch": 0.38453304789938453, "grad_norm": 22.40419578552246, "learning_rate": 6.154669521006154e-07, "loss": 0.2771, "step": 2874 }, { "epoch": 0.3846668450628847, "grad_norm": 30.899120330810547, "learning_rate": 6.153331549371154e-07, "loss": 0.2943, "step": 2875 }, { "epoch": 0.3848006422263848, "grad_norm": 22.075841903686523, "learning_rate": 6.151993577736153e-07, "loss": 0.4899, "step": 2876 }, { "epoch": 0.3849344393898849, "grad_norm": 17.51565170288086, "learning_rate": 6.15065560610115e-07, "loss": 0.48, "step": 2877 }, { "epoch": 0.38506823655338507, "grad_norm": 17.40781593322754, "learning_rate": 6.149317634466149e-07, "loss": 0.3819, "step": 2878 }, { "epoch": 0.3852020337168852, "grad_norm": 12.68521499633789, "learning_rate": 6.147979662831148e-07, "loss": 0.3262, "step": 2879 }, { "epoch": 0.3853358308803853, "grad_norm": 14.970718383789062, "learning_rate": 6.146641691196146e-07, "loss": 0.338, "step": 2880 }, { "epoch": 0.38546962804388546, "grad_norm": 56.12607955932617, "learning_rate": 6.145303719561144e-07, "loss": 1.0432, "step": 2881 }, { "epoch": 0.3856034252073856, "grad_norm": 12.140799522399902, "learning_rate": 6.143965747926144e-07, "loss": 0.3781, "step": 2882 }, { "epoch": 0.38573722237088576, "grad_norm": 37.93321228027344, "learning_rate": 6.142627776291143e-07, "loss": 0.6956, "step": 2883 }, { "epoch": 0.38587101953438585, "grad_norm": 20.66442108154297, "learning_rate": 6.141289804656141e-07, "loss": 0.5118, "step": 2884 }, { "epoch": 0.386004816697886, "grad_norm": 23.51166343688965, "learning_rate": 6.13995183302114e-07, "loss": 0.5984, "step": 2885 }, { "epoch": 0.38613861386138615, "grad_norm": 26.504844665527344, "learning_rate": 6.138613861386138e-07, "loss": 0.4391, "step": 2886 }, { "epoch": 0.3862724110248863, "grad_norm": 16.849559783935547, "learning_rate": 6.137275889751137e-07, "loss": 0.3658, "step": 2887 }, { "epoch": 0.3864062081883864, "grad_norm": 12.198098182678223, "learning_rate": 6.135937918116135e-07, "loss": 0.3804, "step": 2888 }, { "epoch": 0.38654000535188654, "grad_norm": 12.761486053466797, "learning_rate": 6.134599946481134e-07, "loss": 0.3814, "step": 2889 }, { "epoch": 0.3866738025153867, "grad_norm": 15.462820053100586, "learning_rate": 6.133261974846134e-07, "loss": 0.5057, "step": 2890 }, { "epoch": 0.3868075996788868, "grad_norm": 14.355822563171387, "learning_rate": 6.131924003211132e-07, "loss": 0.4662, "step": 2891 }, { "epoch": 0.38694139684238693, "grad_norm": 16.929672241210938, "learning_rate": 6.13058603157613e-07, "loss": 0.3526, "step": 2892 }, { "epoch": 0.3870751940058871, "grad_norm": 10.933850288391113, "learning_rate": 6.129248059941129e-07, "loss": 0.4755, "step": 2893 }, { "epoch": 0.38720899116938723, "grad_norm": 20.423410415649414, "learning_rate": 6.127910088306128e-07, "loss": 0.378, "step": 2894 }, { "epoch": 0.3873427883328873, "grad_norm": 28.696462631225586, "learning_rate": 6.126572116671126e-07, "loss": 0.3822, "step": 2895 }, { "epoch": 0.38747658549638747, "grad_norm": 20.284151077270508, "learning_rate": 6.125234145036124e-07, "loss": 0.5799, "step": 2896 }, { "epoch": 0.3876103826598876, "grad_norm": 21.86400604248047, "learning_rate": 6.123896173401124e-07, "loss": 0.6843, "step": 2897 }, { "epoch": 0.38774417982338777, "grad_norm": 17.084810256958008, "learning_rate": 6.122558201766123e-07, "loss": 0.5725, "step": 2898 }, { "epoch": 0.38787797698688786, "grad_norm": 23.02536392211914, "learning_rate": 6.121220230131122e-07, "loss": 0.4303, "step": 2899 }, { "epoch": 0.388011774150388, "grad_norm": 16.55611801147461, "learning_rate": 6.119882258496119e-07, "loss": 0.5604, "step": 2900 }, { "epoch": 0.38814557131388816, "grad_norm": 29.842409133911133, "learning_rate": 6.118544286861118e-07, "loss": 0.2032, "step": 2901 }, { "epoch": 0.38827936847738825, "grad_norm": 14.985664367675781, "learning_rate": 6.117206315226117e-07, "loss": 0.386, "step": 2902 }, { "epoch": 0.3884131656408884, "grad_norm": 24.698766708374023, "learning_rate": 6.115868343591115e-07, "loss": 0.5154, "step": 2903 }, { "epoch": 0.38854696280438855, "grad_norm": 22.068344116210938, "learning_rate": 6.114530371956115e-07, "loss": 0.6138, "step": 2904 }, { "epoch": 0.3886807599678887, "grad_norm": 13.635701179504395, "learning_rate": 6.113192400321113e-07, "loss": 0.33, "step": 2905 }, { "epoch": 0.3888145571313888, "grad_norm": 11.655815124511719, "learning_rate": 6.111854428686112e-07, "loss": 0.4705, "step": 2906 }, { "epoch": 0.38894835429488894, "grad_norm": 20.441699981689453, "learning_rate": 6.11051645705111e-07, "loss": 0.336, "step": 2907 }, { "epoch": 0.3890821514583891, "grad_norm": 20.26483917236328, "learning_rate": 6.109178485416109e-07, "loss": 0.3896, "step": 2908 }, { "epoch": 0.38921594862188924, "grad_norm": 20.298385620117188, "learning_rate": 6.107840513781107e-07, "loss": 0.3608, "step": 2909 }, { "epoch": 0.38934974578538933, "grad_norm": 15.880058288574219, "learning_rate": 6.106502542146106e-07, "loss": 0.3294, "step": 2910 }, { "epoch": 0.3894835429488895, "grad_norm": 31.657318115234375, "learning_rate": 6.105164570511104e-07, "loss": 0.442, "step": 2911 }, { "epoch": 0.38961734011238963, "grad_norm": 17.310096740722656, "learning_rate": 6.103826598876104e-07, "loss": 0.4814, "step": 2912 }, { "epoch": 0.3897511372758897, "grad_norm": 21.845151901245117, "learning_rate": 6.102488627241103e-07, "loss": 0.5098, "step": 2913 }, { "epoch": 0.3898849344393899, "grad_norm": 46.3972282409668, "learning_rate": 6.101150655606101e-07, "loss": 0.8055, "step": 2914 }, { "epoch": 0.39001873160289, "grad_norm": 16.500648498535156, "learning_rate": 6.099812683971099e-07, "loss": 0.3798, "step": 2915 }, { "epoch": 0.39015252876639017, "grad_norm": 19.828596115112305, "learning_rate": 6.098474712336098e-07, "loss": 0.3851, "step": 2916 }, { "epoch": 0.39028632592989027, "grad_norm": 27.596542358398438, "learning_rate": 6.097136740701097e-07, "loss": 0.4888, "step": 2917 }, { "epoch": 0.3904201230933904, "grad_norm": 17.813112258911133, "learning_rate": 6.095798769066095e-07, "loss": 0.3158, "step": 2918 }, { "epoch": 0.39055392025689056, "grad_norm": 13.849413871765137, "learning_rate": 6.094460797431094e-07, "loss": 0.3988, "step": 2919 }, { "epoch": 0.3906877174203907, "grad_norm": 18.478513717651367, "learning_rate": 6.093122825796093e-07, "loss": 0.393, "step": 2920 }, { "epoch": 0.3908215145838908, "grad_norm": 19.987730026245117, "learning_rate": 6.091784854161092e-07, "loss": 0.3455, "step": 2921 }, { "epoch": 0.39095531174739095, "grad_norm": 20.937223434448242, "learning_rate": 6.090446882526091e-07, "loss": 0.4486, "step": 2922 }, { "epoch": 0.3910891089108911, "grad_norm": 27.038345336914062, "learning_rate": 6.089108910891088e-07, "loss": 0.4774, "step": 2923 }, { "epoch": 0.3912229060743912, "grad_norm": 20.049348831176758, "learning_rate": 6.087770939256087e-07, "loss": 0.4625, "step": 2924 }, { "epoch": 0.39135670323789135, "grad_norm": 13.329590797424316, "learning_rate": 6.086432967621086e-07, "loss": 0.4277, "step": 2925 }, { "epoch": 0.3914905004013915, "grad_norm": 10.815656661987305, "learning_rate": 6.085094995986086e-07, "loss": 0.2735, "step": 2926 }, { "epoch": 0.39162429756489164, "grad_norm": 16.261119842529297, "learning_rate": 6.083757024351084e-07, "loss": 0.3867, "step": 2927 }, { "epoch": 0.39175809472839174, "grad_norm": 15.408010482788086, "learning_rate": 6.082419052716082e-07, "loss": 0.3343, "step": 2928 }, { "epoch": 0.3918918918918919, "grad_norm": 34.5799446105957, "learning_rate": 6.081081081081081e-07, "loss": 0.503, "step": 2929 }, { "epoch": 0.39202568905539203, "grad_norm": 21.049537658691406, "learning_rate": 6.079743109446079e-07, "loss": 0.7253, "step": 2930 }, { "epoch": 0.3921594862188922, "grad_norm": 18.678552627563477, "learning_rate": 6.078405137811078e-07, "loss": 0.4498, "step": 2931 }, { "epoch": 0.3922932833823923, "grad_norm": 18.48225212097168, "learning_rate": 6.077067166176076e-07, "loss": 0.348, "step": 2932 }, { "epoch": 0.3924270805458924, "grad_norm": 13.858044624328613, "learning_rate": 6.075729194541075e-07, "loss": 0.3809, "step": 2933 }, { "epoch": 0.3925608777093926, "grad_norm": 19.397520065307617, "learning_rate": 6.074391222906074e-07, "loss": 0.3766, "step": 2934 }, { "epoch": 0.39269467487289267, "grad_norm": 30.433473587036133, "learning_rate": 6.073053251271073e-07, "loss": 0.3627, "step": 2935 }, { "epoch": 0.3928284720363928, "grad_norm": 14.676492691040039, "learning_rate": 6.071715279636072e-07, "loss": 0.4172, "step": 2936 }, { "epoch": 0.39296226919989297, "grad_norm": 17.488143920898438, "learning_rate": 6.07037730800107e-07, "loss": 0.3215, "step": 2937 }, { "epoch": 0.3930960663633931, "grad_norm": 25.176774978637695, "learning_rate": 6.069039336366068e-07, "loss": 0.3181, "step": 2938 }, { "epoch": 0.3932298635268932, "grad_norm": 17.297792434692383, "learning_rate": 6.067701364731067e-07, "loss": 0.4535, "step": 2939 }, { "epoch": 0.39336366069039336, "grad_norm": 32.91289138793945, "learning_rate": 6.066363393096066e-07, "loss": 0.3261, "step": 2940 }, { "epoch": 0.3934974578538935, "grad_norm": 16.748323440551758, "learning_rate": 6.065025421461066e-07, "loss": 0.4502, "step": 2941 }, { "epoch": 0.39363125501739366, "grad_norm": 26.722091674804688, "learning_rate": 6.063687449826063e-07, "loss": 0.3854, "step": 2942 }, { "epoch": 0.39376505218089375, "grad_norm": 15.83191967010498, "learning_rate": 6.062349478191062e-07, "loss": 0.3934, "step": 2943 }, { "epoch": 0.3938988493443939, "grad_norm": 29.738845825195312, "learning_rate": 6.061011506556061e-07, "loss": 0.5374, "step": 2944 }, { "epoch": 0.39403264650789405, "grad_norm": 39.26590347290039, "learning_rate": 6.05967353492106e-07, "loss": 0.4747, "step": 2945 }, { "epoch": 0.39416644367139414, "grad_norm": 64.25343322753906, "learning_rate": 6.058335563286057e-07, "loss": 0.7585, "step": 2946 }, { "epoch": 0.3943002408348943, "grad_norm": 25.73537254333496, "learning_rate": 6.056997591651056e-07, "loss": 0.4815, "step": 2947 }, { "epoch": 0.39443403799839444, "grad_norm": 12.356478691101074, "learning_rate": 6.055659620016056e-07, "loss": 0.4132, "step": 2948 }, { "epoch": 0.3945678351618946, "grad_norm": 33.28474807739258, "learning_rate": 6.054321648381055e-07, "loss": 0.6072, "step": 2949 }, { "epoch": 0.3947016323253947, "grad_norm": 32.895965576171875, "learning_rate": 6.052983676746053e-07, "loss": 0.8164, "step": 2950 }, { "epoch": 0.39483542948889483, "grad_norm": 22.70209312438965, "learning_rate": 6.051645705111051e-07, "loss": 0.3188, "step": 2951 }, { "epoch": 0.394969226652395, "grad_norm": 20.428857803344727, "learning_rate": 6.05030773347605e-07, "loss": 0.3665, "step": 2952 }, { "epoch": 0.3951030238158951, "grad_norm": 19.301666259765625, "learning_rate": 6.048969761841048e-07, "loss": 0.3123, "step": 2953 }, { "epoch": 0.3952368209793952, "grad_norm": 20.404537200927734, "learning_rate": 6.047631790206047e-07, "loss": 0.4412, "step": 2954 }, { "epoch": 0.39537061814289537, "grad_norm": 29.841848373413086, "learning_rate": 6.046293818571046e-07, "loss": 0.4875, "step": 2955 }, { "epoch": 0.3955044153063955, "grad_norm": 13.686749458312988, "learning_rate": 6.044955846936045e-07, "loss": 0.2608, "step": 2956 }, { "epoch": 0.3956382124698956, "grad_norm": 18.960668563842773, "learning_rate": 6.043617875301043e-07, "loss": 0.4379, "step": 2957 }, { "epoch": 0.39577200963339576, "grad_norm": 14.982998847961426, "learning_rate": 6.042279903666042e-07, "loss": 0.4952, "step": 2958 }, { "epoch": 0.3959058067968959, "grad_norm": 18.048025131225586, "learning_rate": 6.040941932031041e-07, "loss": 0.3326, "step": 2959 }, { "epoch": 0.39603960396039606, "grad_norm": 17.504560470581055, "learning_rate": 6.03960396039604e-07, "loss": 0.5142, "step": 2960 }, { "epoch": 0.39617340112389615, "grad_norm": 13.505754470825195, "learning_rate": 6.038265988761037e-07, "loss": 0.6723, "step": 2961 }, { "epoch": 0.3963071982873963, "grad_norm": 24.217365264892578, "learning_rate": 6.036928017126036e-07, "loss": 0.2296, "step": 2962 }, { "epoch": 0.39644099545089645, "grad_norm": 32.41523742675781, "learning_rate": 6.035590045491036e-07, "loss": 0.4952, "step": 2963 }, { "epoch": 0.3965747926143966, "grad_norm": 15.108489036560059, "learning_rate": 6.034252073856035e-07, "loss": 0.365, "step": 2964 }, { "epoch": 0.3967085897778967, "grad_norm": 27.870882034301758, "learning_rate": 6.032914102221032e-07, "loss": 0.7363, "step": 2965 }, { "epoch": 0.39684238694139684, "grad_norm": 10.844937324523926, "learning_rate": 6.031576130586031e-07, "loss": 0.2682, "step": 2966 }, { "epoch": 0.396976184104897, "grad_norm": 21.80232810974121, "learning_rate": 6.03023815895103e-07, "loss": 0.4349, "step": 2967 }, { "epoch": 0.3971099812683971, "grad_norm": 28.3392391204834, "learning_rate": 6.028900187316029e-07, "loss": 0.4152, "step": 2968 }, { "epoch": 0.39724377843189723, "grad_norm": 27.125076293945312, "learning_rate": 6.027562215681027e-07, "loss": 0.5036, "step": 2969 }, { "epoch": 0.3973775755953974, "grad_norm": 21.289167404174805, "learning_rate": 6.026224244046026e-07, "loss": 0.2916, "step": 2970 }, { "epoch": 0.39751137275889753, "grad_norm": 17.052650451660156, "learning_rate": 6.024886272411025e-07, "loss": 0.3983, "step": 2971 }, { "epoch": 0.3976451699223976, "grad_norm": 18.111248016357422, "learning_rate": 6.023548300776024e-07, "loss": 0.6078, "step": 2972 }, { "epoch": 0.39777896708589777, "grad_norm": 11.94303035736084, "learning_rate": 6.022210329141022e-07, "loss": 0.2819, "step": 2973 }, { "epoch": 0.3979127642493979, "grad_norm": 32.280731201171875, "learning_rate": 6.02087235750602e-07, "loss": 0.5894, "step": 2974 }, { "epoch": 0.39804656141289807, "grad_norm": 29.077669143676758, "learning_rate": 6.019534385871019e-07, "loss": 0.5917, "step": 2975 }, { "epoch": 0.39818035857639816, "grad_norm": 19.16153335571289, "learning_rate": 6.018196414236018e-07, "loss": 0.462, "step": 2976 }, { "epoch": 0.3983141557398983, "grad_norm": 18.188451766967773, "learning_rate": 6.016858442601016e-07, "loss": 0.6027, "step": 2977 }, { "epoch": 0.39844795290339846, "grad_norm": 30.93193244934082, "learning_rate": 6.015520470966016e-07, "loss": 0.4799, "step": 2978 }, { "epoch": 0.39858175006689855, "grad_norm": 34.06515121459961, "learning_rate": 6.014182499331014e-07, "loss": 0.5581, "step": 2979 }, { "epoch": 0.3987155472303987, "grad_norm": 26.635412216186523, "learning_rate": 6.012844527696012e-07, "loss": 0.6194, "step": 2980 }, { "epoch": 0.39884934439389885, "grad_norm": 15.73436450958252, "learning_rate": 6.011506556061011e-07, "loss": 0.3908, "step": 2981 }, { "epoch": 0.398983141557399, "grad_norm": 17.910911560058594, "learning_rate": 6.01016858442601e-07, "loss": 0.3785, "step": 2982 }, { "epoch": 0.3991169387208991, "grad_norm": 17.090023040771484, "learning_rate": 6.008830612791009e-07, "loss": 0.4391, "step": 2983 }, { "epoch": 0.39925073588439924, "grad_norm": 17.411781311035156, "learning_rate": 6.007492641156006e-07, "loss": 0.424, "step": 2984 }, { "epoch": 0.3993845330478994, "grad_norm": 18.373533248901367, "learning_rate": 6.006154669521006e-07, "loss": 0.5249, "step": 2985 }, { "epoch": 0.39951833021139954, "grad_norm": 24.43151092529297, "learning_rate": 6.004816697886005e-07, "loss": 0.5622, "step": 2986 }, { "epoch": 0.39965212737489964, "grad_norm": 42.431663513183594, "learning_rate": 6.003478726251004e-07, "loss": 0.5543, "step": 2987 }, { "epoch": 0.3997859245383998, "grad_norm": 10.631147384643555, "learning_rate": 6.002140754616001e-07, "loss": 0.4382, "step": 2988 }, { "epoch": 0.39991972170189993, "grad_norm": 16.106969833374023, "learning_rate": 6.000802782981e-07, "loss": 0.3956, "step": 2989 }, { "epoch": 0.4000535188654, "grad_norm": 35.817138671875, "learning_rate": 5.999464811345999e-07, "loss": 0.4453, "step": 2990 }, { "epoch": 0.4001873160289002, "grad_norm": 38.77627182006836, "learning_rate": 5.998126839710998e-07, "loss": 0.4919, "step": 2991 }, { "epoch": 0.4003211131924003, "grad_norm": 13.16074275970459, "learning_rate": 5.996788868075997e-07, "loss": 0.5225, "step": 2992 }, { "epoch": 0.4004549103559005, "grad_norm": 22.889745712280273, "learning_rate": 5.995450896440995e-07, "loss": 0.4205, "step": 2993 }, { "epoch": 0.40058870751940057, "grad_norm": 26.118438720703125, "learning_rate": 5.994112924805994e-07, "loss": 0.4383, "step": 2994 }, { "epoch": 0.4007225046829007, "grad_norm": 26.10679054260254, "learning_rate": 5.992774953170993e-07, "loss": 0.4616, "step": 2995 }, { "epoch": 0.40085630184640086, "grad_norm": 20.188148498535156, "learning_rate": 5.991436981535991e-07, "loss": 0.3667, "step": 2996 }, { "epoch": 0.400990099009901, "grad_norm": 19.786598205566406, "learning_rate": 5.99009900990099e-07, "loss": 0.4554, "step": 2997 }, { "epoch": 0.4011238961734011, "grad_norm": 14.819648742675781, "learning_rate": 5.988761038265988e-07, "loss": 0.3735, "step": 2998 }, { "epoch": 0.40125769333690126, "grad_norm": 22.960527420043945, "learning_rate": 5.987423066630987e-07, "loss": 0.4362, "step": 2999 }, { "epoch": 0.4013914905004014, "grad_norm": 24.904943466186523, "learning_rate": 5.986085094995986e-07, "loss": 0.4126, "step": 3000 }, { "epoch": 0.4015252876639015, "grad_norm": 16.535503387451172, "learning_rate": 5.984747123360985e-07, "loss": 0.366, "step": 3001 }, { "epoch": 0.40165908482740165, "grad_norm": 20.71109962463379, "learning_rate": 5.983409151725983e-07, "loss": 0.4574, "step": 3002 }, { "epoch": 0.4017928819909018, "grad_norm": 35.118797302246094, "learning_rate": 5.982071180090982e-07, "loss": 0.4549, "step": 3003 }, { "epoch": 0.40192667915440194, "grad_norm": 18.905179977416992, "learning_rate": 5.98073320845598e-07, "loss": 0.5584, "step": 3004 }, { "epoch": 0.40206047631790204, "grad_norm": 21.017940521240234, "learning_rate": 5.979395236820979e-07, "loss": 0.4668, "step": 3005 }, { "epoch": 0.4021942734814022, "grad_norm": 12.650832176208496, "learning_rate": 5.978057265185978e-07, "loss": 0.3259, "step": 3006 }, { "epoch": 0.40232807064490234, "grad_norm": 45.15636444091797, "learning_rate": 5.976719293550977e-07, "loss": 0.3253, "step": 3007 }, { "epoch": 0.4024618678084025, "grad_norm": 10.717703819274902, "learning_rate": 5.975381321915975e-07, "loss": 0.4919, "step": 3008 }, { "epoch": 0.4025956649719026, "grad_norm": 12.667839050292969, "learning_rate": 5.974043350280974e-07, "loss": 0.3172, "step": 3009 }, { "epoch": 0.4027294621354027, "grad_norm": 13.797382354736328, "learning_rate": 5.972705378645973e-07, "loss": 0.504, "step": 3010 }, { "epoch": 0.4028632592989029, "grad_norm": 16.02069664001465, "learning_rate": 5.97136740701097e-07, "loss": 0.3613, "step": 3011 }, { "epoch": 0.40299705646240297, "grad_norm": 23.463905334472656, "learning_rate": 5.970029435375969e-07, "loss": 0.3416, "step": 3012 }, { "epoch": 0.4031308536259031, "grad_norm": 22.466035842895508, "learning_rate": 5.968691463740968e-07, "loss": 0.4345, "step": 3013 }, { "epoch": 0.40326465078940327, "grad_norm": 14.068483352661133, "learning_rate": 5.967353492105968e-07, "loss": 0.3448, "step": 3014 }, { "epoch": 0.4033984479529034, "grad_norm": 23.339481353759766, "learning_rate": 5.966015520470966e-07, "loss": 0.2635, "step": 3015 }, { "epoch": 0.4035322451164035, "grad_norm": 20.784860610961914, "learning_rate": 5.964677548835964e-07, "loss": 0.4862, "step": 3016 }, { "epoch": 0.40366604227990366, "grad_norm": 12.763715744018555, "learning_rate": 5.963339577200963e-07, "loss": 0.2724, "step": 3017 }, { "epoch": 0.4037998394434038, "grad_norm": 25.051733016967773, "learning_rate": 5.962001605565962e-07, "loss": 0.5538, "step": 3018 }, { "epoch": 0.40393363660690396, "grad_norm": 30.0716609954834, "learning_rate": 5.96066363393096e-07, "loss": 0.5023, "step": 3019 }, { "epoch": 0.40406743377040405, "grad_norm": 41.490482330322266, "learning_rate": 5.959325662295959e-07, "loss": 0.671, "step": 3020 }, { "epoch": 0.4042012309339042, "grad_norm": 15.402030944824219, "learning_rate": 5.957987690660957e-07, "loss": 0.3698, "step": 3021 }, { "epoch": 0.40433502809740435, "grad_norm": 35.81051254272461, "learning_rate": 5.956649719025957e-07, "loss": 0.5882, "step": 3022 }, { "epoch": 0.4044688252609045, "grad_norm": 35.35259246826172, "learning_rate": 5.955311747390955e-07, "loss": 0.7197, "step": 3023 }, { "epoch": 0.4046026224244046, "grad_norm": 24.54574203491211, "learning_rate": 5.953973775755954e-07, "loss": 0.4972, "step": 3024 }, { "epoch": 0.40473641958790474, "grad_norm": 34.97601318359375, "learning_rate": 5.952635804120953e-07, "loss": 0.3994, "step": 3025 }, { "epoch": 0.4048702167514049, "grad_norm": 29.079938888549805, "learning_rate": 5.951297832485951e-07, "loss": 0.6913, "step": 3026 }, { "epoch": 0.405004013914905, "grad_norm": 13.381443977355957, "learning_rate": 5.949959860850949e-07, "loss": 0.3432, "step": 3027 }, { "epoch": 0.40513781107840513, "grad_norm": 15.947858810424805, "learning_rate": 5.948621889215948e-07, "loss": 0.5691, "step": 3028 }, { "epoch": 0.4052716082419053, "grad_norm": 24.946523666381836, "learning_rate": 5.947283917580948e-07, "loss": 0.5335, "step": 3029 }, { "epoch": 0.40540540540540543, "grad_norm": 23.000621795654297, "learning_rate": 5.945945945945947e-07, "loss": 0.4752, "step": 3030 }, { "epoch": 0.4055392025689055, "grad_norm": 14.507658004760742, "learning_rate": 5.944607974310944e-07, "loss": 0.5986, "step": 3031 }, { "epoch": 0.40567299973240567, "grad_norm": 44.06635665893555, "learning_rate": 5.943270002675943e-07, "loss": 0.4283, "step": 3032 }, { "epoch": 0.4058067968959058, "grad_norm": 21.380998611450195, "learning_rate": 5.941932031040942e-07, "loss": 0.4853, "step": 3033 }, { "epoch": 0.40594059405940597, "grad_norm": 21.70125961303711, "learning_rate": 5.94059405940594e-07, "loss": 0.4856, "step": 3034 }, { "epoch": 0.40607439122290606, "grad_norm": 17.456016540527344, "learning_rate": 5.939256087770938e-07, "loss": 0.3873, "step": 3035 }, { "epoch": 0.4062081883864062, "grad_norm": 55.42986297607422, "learning_rate": 5.937918116135938e-07, "loss": 0.3182, "step": 3036 }, { "epoch": 0.40634198554990636, "grad_norm": 20.404300689697266, "learning_rate": 5.936580144500937e-07, "loss": 0.7995, "step": 3037 }, { "epoch": 0.40647578271340645, "grad_norm": 20.173734664916992, "learning_rate": 5.935242172865935e-07, "loss": 0.3911, "step": 3038 }, { "epoch": 0.4066095798769066, "grad_norm": 39.470218658447266, "learning_rate": 5.933904201230934e-07, "loss": 0.4477, "step": 3039 }, { "epoch": 0.40674337704040675, "grad_norm": 18.17654037475586, "learning_rate": 5.932566229595932e-07, "loss": 0.468, "step": 3040 }, { "epoch": 0.4068771742039069, "grad_norm": 14.436890602111816, "learning_rate": 5.931228257960931e-07, "loss": 0.493, "step": 3041 }, { "epoch": 0.407010971367407, "grad_norm": 14.22456169128418, "learning_rate": 5.929890286325929e-07, "loss": 0.34, "step": 3042 }, { "epoch": 0.40714476853090714, "grad_norm": 22.197059631347656, "learning_rate": 5.928552314690928e-07, "loss": 0.4622, "step": 3043 }, { "epoch": 0.4072785656944073, "grad_norm": 32.07297897338867, "learning_rate": 5.927214343055927e-07, "loss": 0.2392, "step": 3044 }, { "epoch": 0.40741236285790744, "grad_norm": 13.119222640991211, "learning_rate": 5.925876371420926e-07, "loss": 0.4931, "step": 3045 }, { "epoch": 0.40754616002140753, "grad_norm": 16.202470779418945, "learning_rate": 5.924538399785924e-07, "loss": 0.3513, "step": 3046 }, { "epoch": 0.4076799571849077, "grad_norm": 20.004680633544922, "learning_rate": 5.923200428150923e-07, "loss": 0.4927, "step": 3047 }, { "epoch": 0.40781375434840783, "grad_norm": 17.80527114868164, "learning_rate": 5.921862456515922e-07, "loss": 0.457, "step": 3048 }, { "epoch": 0.4079475515119079, "grad_norm": 37.25132751464844, "learning_rate": 5.92052448488092e-07, "loss": 0.4491, "step": 3049 }, { "epoch": 0.4080813486754081, "grad_norm": 10.995736122131348, "learning_rate": 5.919186513245918e-07, "loss": 0.2852, "step": 3050 }, { "epoch": 0.4082151458389082, "grad_norm": 32.15553665161133, "learning_rate": 5.917848541610918e-07, "loss": 0.4913, "step": 3051 }, { "epoch": 0.40834894300240837, "grad_norm": 50.59196090698242, "learning_rate": 5.916510569975917e-07, "loss": 0.6164, "step": 3052 }, { "epoch": 0.40848274016590846, "grad_norm": 13.391156196594238, "learning_rate": 5.915172598340916e-07, "loss": 0.4559, "step": 3053 }, { "epoch": 0.4086165373294086, "grad_norm": 24.285953521728516, "learning_rate": 5.913834626705913e-07, "loss": 0.4698, "step": 3054 }, { "epoch": 0.40875033449290876, "grad_norm": 16.97608184814453, "learning_rate": 5.912496655070912e-07, "loss": 0.3078, "step": 3055 }, { "epoch": 0.4088841316564089, "grad_norm": 15.03139877319336, "learning_rate": 5.911158683435911e-07, "loss": 0.3283, "step": 3056 }, { "epoch": 0.409017928819909, "grad_norm": 26.59393882751465, "learning_rate": 5.909820711800909e-07, "loss": 0.5745, "step": 3057 }, { "epoch": 0.40915172598340915, "grad_norm": 22.57600975036621, "learning_rate": 5.908482740165908e-07, "loss": 0.5031, "step": 3058 }, { "epoch": 0.4092855231469093, "grad_norm": 16.130216598510742, "learning_rate": 5.907144768530907e-07, "loss": 0.3329, "step": 3059 }, { "epoch": 0.4094193203104094, "grad_norm": 20.271203994750977, "learning_rate": 5.905806796895906e-07, "loss": 0.4694, "step": 3060 }, { "epoch": 0.40955311747390954, "grad_norm": 14.523141860961914, "learning_rate": 5.904468825260904e-07, "loss": 0.3571, "step": 3061 }, { "epoch": 0.4096869146374097, "grad_norm": 42.99807357788086, "learning_rate": 5.903130853625903e-07, "loss": 0.3462, "step": 3062 }, { "epoch": 0.40982071180090984, "grad_norm": 49.09715270996094, "learning_rate": 5.901792881990901e-07, "loss": 0.4294, "step": 3063 }, { "epoch": 0.40995450896440994, "grad_norm": 44.27449035644531, "learning_rate": 5.9004549103559e-07, "loss": 0.5211, "step": 3064 }, { "epoch": 0.4100883061279101, "grad_norm": 28.67397689819336, "learning_rate": 5.899116938720898e-07, "loss": 0.519, "step": 3065 }, { "epoch": 0.41022210329141023, "grad_norm": 32.495460510253906, "learning_rate": 5.897778967085898e-07, "loss": 0.3125, "step": 3066 }, { "epoch": 0.4103559004549104, "grad_norm": 17.881864547729492, "learning_rate": 5.896440995450897e-07, "loss": 0.4355, "step": 3067 }, { "epoch": 0.4104896976184105, "grad_norm": 25.725826263427734, "learning_rate": 5.895103023815895e-07, "loss": 0.455, "step": 3068 }, { "epoch": 0.4106234947819106, "grad_norm": 33.897239685058594, "learning_rate": 5.893765052180893e-07, "loss": 0.5087, "step": 3069 }, { "epoch": 0.4107572919454108, "grad_norm": 21.701913833618164, "learning_rate": 5.892427080545892e-07, "loss": 0.5103, "step": 3070 }, { "epoch": 0.41089108910891087, "grad_norm": 27.802959442138672, "learning_rate": 5.891089108910891e-07, "loss": 0.3674, "step": 3071 }, { "epoch": 0.411024886272411, "grad_norm": 21.73274040222168, "learning_rate": 5.889751137275889e-07, "loss": 0.3379, "step": 3072 }, { "epoch": 0.41115868343591117, "grad_norm": 19.098636627197266, "learning_rate": 5.888413165640888e-07, "loss": 0.273, "step": 3073 }, { "epoch": 0.4112924805994113, "grad_norm": 21.41251564025879, "learning_rate": 5.887075194005887e-07, "loss": 0.3165, "step": 3074 }, { "epoch": 0.4114262777629114, "grad_norm": 42.140380859375, "learning_rate": 5.885737222370886e-07, "loss": 0.6091, "step": 3075 }, { "epoch": 0.41156007492641156, "grad_norm": 32.52119827270508, "learning_rate": 5.884399250735885e-07, "loss": 0.6062, "step": 3076 }, { "epoch": 0.4116938720899117, "grad_norm": 21.447599411010742, "learning_rate": 5.883061279100882e-07, "loss": 0.5348, "step": 3077 }, { "epoch": 0.41182766925341185, "grad_norm": 17.55247688293457, "learning_rate": 5.881723307465881e-07, "loss": 0.4884, "step": 3078 }, { "epoch": 0.41196146641691195, "grad_norm": 23.380126953125, "learning_rate": 5.88038533583088e-07, "loss": 0.3576, "step": 3079 }, { "epoch": 0.4120952635804121, "grad_norm": 33.206729888916016, "learning_rate": 5.87904736419588e-07, "loss": 0.5918, "step": 3080 }, { "epoch": 0.41222906074391225, "grad_norm": 28.057151794433594, "learning_rate": 5.877709392560878e-07, "loss": 0.3735, "step": 3081 }, { "epoch": 0.41236285790741234, "grad_norm": 15.977526664733887, "learning_rate": 5.876371420925876e-07, "loss": 0.4177, "step": 3082 }, { "epoch": 0.4124966550709125, "grad_norm": 32.166473388671875, "learning_rate": 5.875033449290875e-07, "loss": 0.6116, "step": 3083 }, { "epoch": 0.41263045223441264, "grad_norm": 17.243608474731445, "learning_rate": 5.873695477655873e-07, "loss": 0.4375, "step": 3084 }, { "epoch": 0.4127642493979128, "grad_norm": 29.912199020385742, "learning_rate": 5.872357506020872e-07, "loss": 0.7598, "step": 3085 }, { "epoch": 0.4128980465614129, "grad_norm": 17.65205192565918, "learning_rate": 5.87101953438587e-07, "loss": 0.545, "step": 3086 }, { "epoch": 0.41303184372491303, "grad_norm": 24.464262008666992, "learning_rate": 5.869681562750869e-07, "loss": 0.4471, "step": 3087 }, { "epoch": 0.4131656408884132, "grad_norm": 17.946447372436523, "learning_rate": 5.868343591115868e-07, "loss": 0.5601, "step": 3088 }, { "epoch": 0.4132994380519133, "grad_norm": 15.598030090332031, "learning_rate": 5.867005619480867e-07, "loss": 0.41, "step": 3089 }, { "epoch": 0.4134332352154134, "grad_norm": 25.24568748474121, "learning_rate": 5.865667647845866e-07, "loss": 0.3652, "step": 3090 }, { "epoch": 0.41356703237891357, "grad_norm": 29.020761489868164, "learning_rate": 5.864329676210864e-07, "loss": 0.5151, "step": 3091 }, { "epoch": 0.4137008295424137, "grad_norm": 24.75641632080078, "learning_rate": 5.862991704575862e-07, "loss": 0.4171, "step": 3092 }, { "epoch": 0.4138346267059138, "grad_norm": 13.983478546142578, "learning_rate": 5.861653732940861e-07, "loss": 0.4305, "step": 3093 }, { "epoch": 0.41396842386941396, "grad_norm": 22.369205474853516, "learning_rate": 5.86031576130586e-07, "loss": 0.5359, "step": 3094 }, { "epoch": 0.4141022210329141, "grad_norm": 17.822481155395508, "learning_rate": 5.85897778967086e-07, "loss": 0.5535, "step": 3095 }, { "epoch": 0.41423601819641426, "grad_norm": 16.49127960205078, "learning_rate": 5.857639818035857e-07, "loss": 0.5275, "step": 3096 }, { "epoch": 0.41436981535991435, "grad_norm": 33.265933990478516, "learning_rate": 5.856301846400856e-07, "loss": 0.3921, "step": 3097 }, { "epoch": 0.4145036125234145, "grad_norm": 35.605873107910156, "learning_rate": 5.854963874765855e-07, "loss": 0.4103, "step": 3098 }, { "epoch": 0.41463740968691465, "grad_norm": 44.111202239990234, "learning_rate": 5.853625903130854e-07, "loss": 0.6189, "step": 3099 }, { "epoch": 0.4147712068504148, "grad_norm": 36.59880828857422, "learning_rate": 5.852287931495851e-07, "loss": 0.2612, "step": 3100 }, { "epoch": 0.4149050040139149, "grad_norm": 21.584840774536133, "learning_rate": 5.85094995986085e-07, "loss": 0.4913, "step": 3101 }, { "epoch": 0.41503880117741504, "grad_norm": 12.263314247131348, "learning_rate": 5.84961198822585e-07, "loss": 0.4288, "step": 3102 }, { "epoch": 0.4151725983409152, "grad_norm": 11.721925735473633, "learning_rate": 5.848274016590849e-07, "loss": 0.2536, "step": 3103 }, { "epoch": 0.4153063955044153, "grad_norm": 27.258329391479492, "learning_rate": 5.846936044955847e-07, "loss": 0.3217, "step": 3104 }, { "epoch": 0.41544019266791543, "grad_norm": 19.35981559753418, "learning_rate": 5.845598073320845e-07, "loss": 0.2872, "step": 3105 }, { "epoch": 0.4155739898314156, "grad_norm": 21.127674102783203, "learning_rate": 5.844260101685844e-07, "loss": 0.4555, "step": 3106 }, { "epoch": 0.41570778699491573, "grad_norm": 16.01763343811035, "learning_rate": 5.842922130050843e-07, "loss": 0.382, "step": 3107 }, { "epoch": 0.4158415841584158, "grad_norm": 12.684490203857422, "learning_rate": 5.841584158415841e-07, "loss": 0.3566, "step": 3108 }, { "epoch": 0.41597538132191597, "grad_norm": 26.204626083374023, "learning_rate": 5.84024618678084e-07, "loss": 0.4148, "step": 3109 }, { "epoch": 0.4161091784854161, "grad_norm": 23.750900268554688, "learning_rate": 5.838908215145839e-07, "loss": 0.1877, "step": 3110 }, { "epoch": 0.41624297564891627, "grad_norm": 12.5011568069458, "learning_rate": 5.837570243510838e-07, "loss": 0.4079, "step": 3111 }, { "epoch": 0.41637677281241636, "grad_norm": 25.17943000793457, "learning_rate": 5.836232271875836e-07, "loss": 0.4745, "step": 3112 }, { "epoch": 0.4165105699759165, "grad_norm": 33.282493591308594, "learning_rate": 5.834894300240835e-07, "loss": 0.4196, "step": 3113 }, { "epoch": 0.41664436713941666, "grad_norm": 22.055917739868164, "learning_rate": 5.833556328605833e-07, "loss": 0.3336, "step": 3114 }, { "epoch": 0.41677816430291675, "grad_norm": 29.553489685058594, "learning_rate": 5.832218356970831e-07, "loss": 0.3503, "step": 3115 }, { "epoch": 0.4169119614664169, "grad_norm": 26.592050552368164, "learning_rate": 5.83088038533583e-07, "loss": 0.4791, "step": 3116 }, { "epoch": 0.41704575862991705, "grad_norm": 22.717676162719727, "learning_rate": 5.82954241370083e-07, "loss": 0.3964, "step": 3117 }, { "epoch": 0.4171795557934172, "grad_norm": 10.677505493164062, "learning_rate": 5.828204442065829e-07, "loss": 0.3348, "step": 3118 }, { "epoch": 0.4173133529569173, "grad_norm": 59.3098030090332, "learning_rate": 5.826866470430826e-07, "loss": 0.5888, "step": 3119 }, { "epoch": 0.41744715012041744, "grad_norm": 26.317468643188477, "learning_rate": 5.825528498795825e-07, "loss": 0.4173, "step": 3120 }, { "epoch": 0.4175809472839176, "grad_norm": 31.669349670410156, "learning_rate": 5.824190527160824e-07, "loss": 0.6187, "step": 3121 }, { "epoch": 0.41771474444741774, "grad_norm": 11.82263469696045, "learning_rate": 5.822852555525823e-07, "loss": 0.3166, "step": 3122 }, { "epoch": 0.41784854161091783, "grad_norm": 29.04935646057129, "learning_rate": 5.82151458389082e-07, "loss": 0.4084, "step": 3123 }, { "epoch": 0.417982338774418, "grad_norm": 14.782816886901855, "learning_rate": 5.82017661225582e-07, "loss": 0.4676, "step": 3124 }, { "epoch": 0.41811613593791813, "grad_norm": 31.397424697875977, "learning_rate": 5.818838640620819e-07, "loss": 0.3732, "step": 3125 }, { "epoch": 0.4182499331014182, "grad_norm": 12.398863792419434, "learning_rate": 5.817500668985818e-07, "loss": 0.2866, "step": 3126 }, { "epoch": 0.4183837302649184, "grad_norm": 34.60478591918945, "learning_rate": 5.816162697350816e-07, "loss": 0.6009, "step": 3127 }, { "epoch": 0.4185175274284185, "grad_norm": 27.41828727722168, "learning_rate": 5.814824725715814e-07, "loss": 0.2478, "step": 3128 }, { "epoch": 0.41865132459191867, "grad_norm": 24.52680206298828, "learning_rate": 5.813486754080813e-07, "loss": 0.3556, "step": 3129 }, { "epoch": 0.41878512175541877, "grad_norm": 22.825878143310547, "learning_rate": 5.812148782445812e-07, "loss": 0.4209, "step": 3130 }, { "epoch": 0.4189189189189189, "grad_norm": 27.741497039794922, "learning_rate": 5.81081081081081e-07, "loss": 0.5544, "step": 3131 }, { "epoch": 0.41905271608241906, "grad_norm": 26.652645111083984, "learning_rate": 5.80947283917581e-07, "loss": 0.4044, "step": 3132 }, { "epoch": 0.4191865132459192, "grad_norm": 22.34974479675293, "learning_rate": 5.808134867540808e-07, "loss": 0.5739, "step": 3133 }, { "epoch": 0.4193203104094193, "grad_norm": 23.799280166625977, "learning_rate": 5.806796895905807e-07, "loss": 0.4504, "step": 3134 }, { "epoch": 0.41945410757291945, "grad_norm": 23.24643325805664, "learning_rate": 5.805458924270805e-07, "loss": 0.5784, "step": 3135 }, { "epoch": 0.4195879047364196, "grad_norm": 15.308497428894043, "learning_rate": 5.804120952635804e-07, "loss": 0.3782, "step": 3136 }, { "epoch": 0.4197217018999197, "grad_norm": 20.59400177001953, "learning_rate": 5.802782981000803e-07, "loss": 0.4118, "step": 3137 }, { "epoch": 0.41985549906341985, "grad_norm": 27.415842056274414, "learning_rate": 5.8014450093658e-07, "loss": 0.359, "step": 3138 }, { "epoch": 0.41998929622692, "grad_norm": 20.444438934326172, "learning_rate": 5.8001070377308e-07, "loss": 0.4574, "step": 3139 }, { "epoch": 0.42012309339042014, "grad_norm": 47.98008346557617, "learning_rate": 5.798769066095799e-07, "loss": 0.5854, "step": 3140 }, { "epoch": 0.42025689055392024, "grad_norm": 38.704566955566406, "learning_rate": 5.797431094460798e-07, "loss": 0.5822, "step": 3141 }, { "epoch": 0.4203906877174204, "grad_norm": 22.03931427001953, "learning_rate": 5.796093122825795e-07, "loss": 0.3906, "step": 3142 }, { "epoch": 0.42052448488092053, "grad_norm": 20.742992401123047, "learning_rate": 5.794755151190794e-07, "loss": 0.6259, "step": 3143 }, { "epoch": 0.4206582820444207, "grad_norm": 15.4249849319458, "learning_rate": 5.793417179555793e-07, "loss": 0.5218, "step": 3144 }, { "epoch": 0.4207920792079208, "grad_norm": 30.330463409423828, "learning_rate": 5.792079207920792e-07, "loss": 0.4021, "step": 3145 }, { "epoch": 0.4209258763714209, "grad_norm": 17.749055862426758, "learning_rate": 5.790741236285791e-07, "loss": 0.3559, "step": 3146 }, { "epoch": 0.4210596735349211, "grad_norm": 33.004878997802734, "learning_rate": 5.789403264650789e-07, "loss": 0.5386, "step": 3147 }, { "epoch": 0.42119347069842117, "grad_norm": 20.20098876953125, "learning_rate": 5.788065293015788e-07, "loss": 0.3586, "step": 3148 }, { "epoch": 0.4213272678619213, "grad_norm": 26.834280014038086, "learning_rate": 5.786727321380787e-07, "loss": 0.4167, "step": 3149 }, { "epoch": 0.42146106502542147, "grad_norm": 27.056968688964844, "learning_rate": 5.785389349745785e-07, "loss": 0.4222, "step": 3150 }, { "epoch": 0.4215948621889216, "grad_norm": 17.46430778503418, "learning_rate": 5.784051378110783e-07, "loss": 0.5236, "step": 3151 }, { "epoch": 0.4217286593524217, "grad_norm": 18.580183029174805, "learning_rate": 5.782713406475782e-07, "loss": 0.3252, "step": 3152 }, { "epoch": 0.42186245651592186, "grad_norm": 16.21845054626465, "learning_rate": 5.781375434840781e-07, "loss": 0.3018, "step": 3153 }, { "epoch": 0.421996253679422, "grad_norm": 49.28647994995117, "learning_rate": 5.78003746320578e-07, "loss": 0.4795, "step": 3154 }, { "epoch": 0.42213005084292216, "grad_norm": 29.984882354736328, "learning_rate": 5.778699491570779e-07, "loss": 0.5396, "step": 3155 }, { "epoch": 0.42226384800642225, "grad_norm": 28.176719665527344, "learning_rate": 5.777361519935777e-07, "loss": 0.4773, "step": 3156 }, { "epoch": 0.4223976451699224, "grad_norm": 13.967986106872559, "learning_rate": 5.776023548300776e-07, "loss": 0.4469, "step": 3157 }, { "epoch": 0.42253144233342255, "grad_norm": 30.529823303222656, "learning_rate": 5.774685576665774e-07, "loss": 0.6104, "step": 3158 }, { "epoch": 0.42266523949692264, "grad_norm": 23.454191207885742, "learning_rate": 5.773347605030773e-07, "loss": 0.5466, "step": 3159 }, { "epoch": 0.4227990366604228, "grad_norm": 18.134395599365234, "learning_rate": 5.772009633395772e-07, "loss": 0.645, "step": 3160 }, { "epoch": 0.42293283382392294, "grad_norm": 16.4210205078125, "learning_rate": 5.770671661760771e-07, "loss": 0.5129, "step": 3161 }, { "epoch": 0.4230666309874231, "grad_norm": 43.29154586791992, "learning_rate": 5.769333690125769e-07, "loss": 0.5336, "step": 3162 }, { "epoch": 0.4232004281509232, "grad_norm": 21.1964111328125, "learning_rate": 5.767995718490768e-07, "loss": 0.4053, "step": 3163 }, { "epoch": 0.42333422531442333, "grad_norm": 39.89664840698242, "learning_rate": 5.766657746855767e-07, "loss": 0.3694, "step": 3164 }, { "epoch": 0.4234680224779235, "grad_norm": 33.701507568359375, "learning_rate": 5.765319775220764e-07, "loss": 0.3446, "step": 3165 }, { "epoch": 0.4236018196414236, "grad_norm": 14.88020133972168, "learning_rate": 5.763981803585763e-07, "loss": 0.4926, "step": 3166 }, { "epoch": 0.4237356168049237, "grad_norm": 15.046886444091797, "learning_rate": 5.762643831950762e-07, "loss": 0.5238, "step": 3167 }, { "epoch": 0.42386941396842387, "grad_norm": 12.825372695922852, "learning_rate": 5.761305860315761e-07, "loss": 0.6607, "step": 3168 }, { "epoch": 0.424003211131924, "grad_norm": 28.132665634155273, "learning_rate": 5.75996788868076e-07, "loss": 0.6115, "step": 3169 }, { "epoch": 0.4241370082954241, "grad_norm": 28.167224884033203, "learning_rate": 5.758629917045758e-07, "loss": 0.3, "step": 3170 }, { "epoch": 0.42427080545892426, "grad_norm": 18.744958877563477, "learning_rate": 5.757291945410757e-07, "loss": 0.6202, "step": 3171 }, { "epoch": 0.4244046026224244, "grad_norm": 15.692273139953613, "learning_rate": 5.755953973775756e-07, "loss": 0.4824, "step": 3172 }, { "epoch": 0.42453839978592456, "grad_norm": 13.814704895019531, "learning_rate": 5.754616002140754e-07, "loss": 0.5035, "step": 3173 }, { "epoch": 0.42467219694942465, "grad_norm": 30.750877380371094, "learning_rate": 5.753278030505753e-07, "loss": 0.5121, "step": 3174 }, { "epoch": 0.4248059941129248, "grad_norm": 21.89387321472168, "learning_rate": 5.751940058870751e-07, "loss": 0.3669, "step": 3175 }, { "epoch": 0.42493979127642495, "grad_norm": 19.01833724975586, "learning_rate": 5.750602087235751e-07, "loss": 0.4548, "step": 3176 }, { "epoch": 0.4250735884399251, "grad_norm": 17.754268646240234, "learning_rate": 5.749264115600749e-07, "loss": 0.468, "step": 3177 }, { "epoch": 0.4252073856034252, "grad_norm": 24.86610221862793, "learning_rate": 5.747926143965748e-07, "loss": 0.5483, "step": 3178 }, { "epoch": 0.42534118276692534, "grad_norm": 20.229223251342773, "learning_rate": 5.746588172330747e-07, "loss": 0.3322, "step": 3179 }, { "epoch": 0.4254749799304255, "grad_norm": 16.3660888671875, "learning_rate": 5.745250200695745e-07, "loss": 0.5041, "step": 3180 }, { "epoch": 0.4256087770939256, "grad_norm": 11.691067695617676, "learning_rate": 5.743912229060743e-07, "loss": 0.4428, "step": 3181 }, { "epoch": 0.42574257425742573, "grad_norm": 19.040699005126953, "learning_rate": 5.742574257425742e-07, "loss": 0.4432, "step": 3182 }, { "epoch": 0.4258763714209259, "grad_norm": 21.703824996948242, "learning_rate": 5.741236285790742e-07, "loss": 0.4621, "step": 3183 }, { "epoch": 0.42601016858442603, "grad_norm": 25.6138858795166, "learning_rate": 5.73989831415574e-07, "loss": 0.491, "step": 3184 }, { "epoch": 0.4261439657479261, "grad_norm": 16.802927017211914, "learning_rate": 5.738560342520738e-07, "loss": 0.41, "step": 3185 }, { "epoch": 0.4262777629114263, "grad_norm": 12.069120407104492, "learning_rate": 5.737222370885737e-07, "loss": 0.3679, "step": 3186 }, { "epoch": 0.4264115600749264, "grad_norm": 13.586202621459961, "learning_rate": 5.735884399250736e-07, "loss": 0.3583, "step": 3187 }, { "epoch": 0.42654535723842657, "grad_norm": 19.6392765045166, "learning_rate": 5.734546427615734e-07, "loss": 0.4163, "step": 3188 }, { "epoch": 0.42667915440192666, "grad_norm": 21.479711532592773, "learning_rate": 5.733208455980732e-07, "loss": 0.3645, "step": 3189 }, { "epoch": 0.4268129515654268, "grad_norm": 20.448572158813477, "learning_rate": 5.731870484345731e-07, "loss": 0.2939, "step": 3190 }, { "epoch": 0.42694674872892696, "grad_norm": 16.01723861694336, "learning_rate": 5.730532512710731e-07, "loss": 0.4788, "step": 3191 }, { "epoch": 0.42708054589242705, "grad_norm": 15.397117614746094, "learning_rate": 5.729194541075729e-07, "loss": 0.5296, "step": 3192 }, { "epoch": 0.4272143430559272, "grad_norm": 18.947166442871094, "learning_rate": 5.727856569440727e-07, "loss": 0.5644, "step": 3193 }, { "epoch": 0.42734814021942735, "grad_norm": 31.11532211303711, "learning_rate": 5.726518597805726e-07, "loss": 0.6094, "step": 3194 }, { "epoch": 0.4274819373829275, "grad_norm": 20.278186798095703, "learning_rate": 5.725180626170725e-07, "loss": 0.2877, "step": 3195 }, { "epoch": 0.4276157345464276, "grad_norm": 34.093421936035156, "learning_rate": 5.723842654535723e-07, "loss": 0.5575, "step": 3196 }, { "epoch": 0.42774953170992774, "grad_norm": 16.195781707763672, "learning_rate": 5.722504682900722e-07, "loss": 0.6231, "step": 3197 }, { "epoch": 0.4278833288734279, "grad_norm": 17.437908172607422, "learning_rate": 5.721166711265721e-07, "loss": 0.6043, "step": 3198 }, { "epoch": 0.42801712603692804, "grad_norm": 26.218711853027344, "learning_rate": 5.71982873963072e-07, "loss": 0.5298, "step": 3199 }, { "epoch": 0.42815092320042814, "grad_norm": 17.248626708984375, "learning_rate": 5.718490767995718e-07, "loss": 0.4366, "step": 3200 }, { "epoch": 0.4282847203639283, "grad_norm": 17.14711570739746, "learning_rate": 5.717152796360717e-07, "loss": 0.462, "step": 3201 }, { "epoch": 0.42841851752742843, "grad_norm": 17.39344024658203, "learning_rate": 5.715814824725716e-07, "loss": 0.3778, "step": 3202 }, { "epoch": 0.4285523146909285, "grad_norm": 17.21553611755371, "learning_rate": 5.714476853090714e-07, "loss": 0.4039, "step": 3203 }, { "epoch": 0.4286861118544287, "grad_norm": 29.68196678161621, "learning_rate": 5.713138881455712e-07, "loss": 0.518, "step": 3204 }, { "epoch": 0.4288199090179288, "grad_norm": 11.241655349731445, "learning_rate": 5.711800909820712e-07, "loss": 0.3374, "step": 3205 }, { "epoch": 0.428953706181429, "grad_norm": 15.38609504699707, "learning_rate": 5.710462938185711e-07, "loss": 0.3875, "step": 3206 }, { "epoch": 0.42908750334492907, "grad_norm": 13.010043144226074, "learning_rate": 5.70912496655071e-07, "loss": 0.3504, "step": 3207 }, { "epoch": 0.4292213005084292, "grad_norm": 31.899396896362305, "learning_rate": 5.707786994915707e-07, "loss": 0.3978, "step": 3208 }, { "epoch": 0.42935509767192936, "grad_norm": 12.225510597229004, "learning_rate": 5.706449023280706e-07, "loss": 0.3528, "step": 3209 }, { "epoch": 0.4294888948354295, "grad_norm": 29.932544708251953, "learning_rate": 5.705111051645705e-07, "loss": 0.4806, "step": 3210 }, { "epoch": 0.4296226919989296, "grad_norm": 16.156749725341797, "learning_rate": 5.703773080010704e-07, "loss": 0.4576, "step": 3211 }, { "epoch": 0.42975648916242976, "grad_norm": 16.393571853637695, "learning_rate": 5.702435108375701e-07, "loss": 0.517, "step": 3212 }, { "epoch": 0.4298902863259299, "grad_norm": 21.96512222290039, "learning_rate": 5.701097136740701e-07, "loss": 0.2003, "step": 3213 }, { "epoch": 0.43002408348943, "grad_norm": 38.31145477294922, "learning_rate": 5.6997591651057e-07, "loss": 0.6283, "step": 3214 }, { "epoch": 0.43015788065293015, "grad_norm": 25.074256896972656, "learning_rate": 5.698421193470698e-07, "loss": 0.4097, "step": 3215 }, { "epoch": 0.4302916778164303, "grad_norm": 24.448223114013672, "learning_rate": 5.697083221835697e-07, "loss": 0.3999, "step": 3216 }, { "epoch": 0.43042547497993044, "grad_norm": 24.25440216064453, "learning_rate": 5.695745250200695e-07, "loss": 0.5155, "step": 3217 }, { "epoch": 0.43055927214343054, "grad_norm": 25.719728469848633, "learning_rate": 5.694407278565694e-07, "loss": 0.3028, "step": 3218 }, { "epoch": 0.4306930693069307, "grad_norm": 20.92743682861328, "learning_rate": 5.693069306930692e-07, "loss": 0.3327, "step": 3219 }, { "epoch": 0.43082686647043084, "grad_norm": 25.926511764526367, "learning_rate": 5.691731335295692e-07, "loss": 0.4606, "step": 3220 }, { "epoch": 0.430960663633931, "grad_norm": 22.841476440429688, "learning_rate": 5.69039336366069e-07, "loss": 0.4556, "step": 3221 }, { "epoch": 0.4310944607974311, "grad_norm": 24.960657119750977, "learning_rate": 5.689055392025689e-07, "loss": 0.412, "step": 3222 }, { "epoch": 0.4312282579609312, "grad_norm": 18.93363380432129, "learning_rate": 5.687717420390687e-07, "loss": 0.4796, "step": 3223 }, { "epoch": 0.4313620551244314, "grad_norm": 27.2554931640625, "learning_rate": 5.686379448755686e-07, "loss": 0.6443, "step": 3224 }, { "epoch": 0.43149585228793147, "grad_norm": 31.000329971313477, "learning_rate": 5.685041477120685e-07, "loss": 0.7034, "step": 3225 }, { "epoch": 0.4316296494514316, "grad_norm": 17.041597366333008, "learning_rate": 5.683703505485683e-07, "loss": 0.374, "step": 3226 }, { "epoch": 0.43176344661493177, "grad_norm": 24.71409797668457, "learning_rate": 5.682365533850682e-07, "loss": 0.5449, "step": 3227 }, { "epoch": 0.4318972437784319, "grad_norm": 18.99776840209961, "learning_rate": 5.681027562215681e-07, "loss": 0.4122, "step": 3228 }, { "epoch": 0.432031040941932, "grad_norm": 15.439486503601074, "learning_rate": 5.67968959058068e-07, "loss": 0.2434, "step": 3229 }, { "epoch": 0.43216483810543216, "grad_norm": 13.885345458984375, "learning_rate": 5.678351618945679e-07, "loss": 0.6462, "step": 3230 }, { "epoch": 0.4322986352689323, "grad_norm": 18.516952514648438, "learning_rate": 5.677013647310676e-07, "loss": 0.3891, "step": 3231 }, { "epoch": 0.43243243243243246, "grad_norm": 15.22599983215332, "learning_rate": 5.675675675675675e-07, "loss": 0.4115, "step": 3232 }, { "epoch": 0.43256622959593255, "grad_norm": 15.095317840576172, "learning_rate": 5.674337704040674e-07, "loss": 0.4502, "step": 3233 }, { "epoch": 0.4327000267594327, "grad_norm": 18.96845817565918, "learning_rate": 5.672999732405673e-07, "loss": 0.3064, "step": 3234 }, { "epoch": 0.43283382392293285, "grad_norm": 19.80219841003418, "learning_rate": 5.671661760770671e-07, "loss": 0.4788, "step": 3235 }, { "epoch": 0.43296762108643294, "grad_norm": 22.863887786865234, "learning_rate": 5.67032378913567e-07, "loss": 0.4406, "step": 3236 }, { "epoch": 0.4331014182499331, "grad_norm": 25.705089569091797, "learning_rate": 5.668985817500669e-07, "loss": 0.542, "step": 3237 }, { "epoch": 0.43323521541343324, "grad_norm": 29.511743545532227, "learning_rate": 5.667647845865668e-07, "loss": 0.4891, "step": 3238 }, { "epoch": 0.4333690125769334, "grad_norm": 15.63769817352295, "learning_rate": 5.666309874230666e-07, "loss": 0.4309, "step": 3239 }, { "epoch": 0.4335028097404335, "grad_norm": 25.188810348510742, "learning_rate": 5.664971902595664e-07, "loss": 0.4741, "step": 3240 }, { "epoch": 0.43363660690393363, "grad_norm": 20.477542877197266, "learning_rate": 5.663633930960663e-07, "loss": 0.4225, "step": 3241 }, { "epoch": 0.4337704040674338, "grad_norm": 16.794652938842773, "learning_rate": 5.662295959325663e-07, "loss": 0.5015, "step": 3242 }, { "epoch": 0.43390420123093393, "grad_norm": 12.003473281860352, "learning_rate": 5.660957987690661e-07, "loss": 0.4022, "step": 3243 }, { "epoch": 0.434037998394434, "grad_norm": 11.92937183380127, "learning_rate": 5.65962001605566e-07, "loss": 0.3543, "step": 3244 }, { "epoch": 0.43417179555793417, "grad_norm": 13.9984130859375, "learning_rate": 5.658282044420658e-07, "loss": 0.3685, "step": 3245 }, { "epoch": 0.4343055927214343, "grad_norm": 12.73576545715332, "learning_rate": 5.656944072785656e-07, "loss": 0.3381, "step": 3246 }, { "epoch": 0.4344393898849344, "grad_norm": 11.96581745147705, "learning_rate": 5.655606101150655e-07, "loss": 0.457, "step": 3247 }, { "epoch": 0.43457318704843456, "grad_norm": 24.9058780670166, "learning_rate": 5.654268129515654e-07, "loss": 0.5998, "step": 3248 }, { "epoch": 0.4347069842119347, "grad_norm": 15.591460227966309, "learning_rate": 5.652930157880654e-07, "loss": 0.2922, "step": 3249 }, { "epoch": 0.43484078137543486, "grad_norm": 24.2352237701416, "learning_rate": 5.651592186245651e-07, "loss": 0.397, "step": 3250 }, { "epoch": 0.43497457853893495, "grad_norm": 16.609100341796875, "learning_rate": 5.65025421461065e-07, "loss": 0.3946, "step": 3251 }, { "epoch": 0.4351083757024351, "grad_norm": 16.839563369750977, "learning_rate": 5.648916242975649e-07, "loss": 0.2687, "step": 3252 }, { "epoch": 0.43524217286593525, "grad_norm": 22.555925369262695, "learning_rate": 5.647578271340648e-07, "loss": 0.3775, "step": 3253 }, { "epoch": 0.4353759700294354, "grad_norm": 34.8900032043457, "learning_rate": 5.646240299705645e-07, "loss": 0.6585, "step": 3254 }, { "epoch": 0.4355097671929355, "grad_norm": 28.736726760864258, "learning_rate": 5.644902328070644e-07, "loss": 0.5871, "step": 3255 }, { "epoch": 0.43564356435643564, "grad_norm": 10.904458045959473, "learning_rate": 5.643564356435643e-07, "loss": 0.3167, "step": 3256 }, { "epoch": 0.4357773615199358, "grad_norm": 19.210920333862305, "learning_rate": 5.642226384800643e-07, "loss": 0.6118, "step": 3257 }, { "epoch": 0.4359111586834359, "grad_norm": 19.07093620300293, "learning_rate": 5.640888413165641e-07, "loss": 0.5057, "step": 3258 }, { "epoch": 0.43604495584693603, "grad_norm": 19.50852394104004, "learning_rate": 5.639550441530639e-07, "loss": 0.4358, "step": 3259 }, { "epoch": 0.4361787530104362, "grad_norm": 16.44367218017578, "learning_rate": 5.638212469895638e-07, "loss": 0.4164, "step": 3260 }, { "epoch": 0.43631255017393633, "grad_norm": 22.59705352783203, "learning_rate": 5.636874498260637e-07, "loss": 0.4341, "step": 3261 }, { "epoch": 0.4364463473374364, "grad_norm": 26.45863914489746, "learning_rate": 5.635536526625635e-07, "loss": 0.3406, "step": 3262 }, { "epoch": 0.4365801445009366, "grad_norm": 24.19767951965332, "learning_rate": 5.634198554990633e-07, "loss": 0.4424, "step": 3263 }, { "epoch": 0.4367139416644367, "grad_norm": 33.01032257080078, "learning_rate": 5.632860583355633e-07, "loss": 0.5974, "step": 3264 }, { "epoch": 0.43684773882793687, "grad_norm": 10.345341682434082, "learning_rate": 5.631522611720632e-07, "loss": 0.3617, "step": 3265 }, { "epoch": 0.43698153599143696, "grad_norm": 24.884357452392578, "learning_rate": 5.63018464008563e-07, "loss": 0.455, "step": 3266 }, { "epoch": 0.4371153331549371, "grad_norm": 20.653783798217773, "learning_rate": 5.628846668450629e-07, "loss": 0.5604, "step": 3267 }, { "epoch": 0.43724913031843726, "grad_norm": 15.797561645507812, "learning_rate": 5.627508696815627e-07, "loss": 0.1871, "step": 3268 }, { "epoch": 0.43738292748193736, "grad_norm": 31.710973739624023, "learning_rate": 5.626170725180625e-07, "loss": 0.5195, "step": 3269 }, { "epoch": 0.4375167246454375, "grad_norm": 18.276634216308594, "learning_rate": 5.624832753545624e-07, "loss": 0.5139, "step": 3270 }, { "epoch": 0.43765052180893765, "grad_norm": 15.487857818603516, "learning_rate": 5.623494781910624e-07, "loss": 0.4027, "step": 3271 }, { "epoch": 0.4377843189724378, "grad_norm": 15.676012992858887, "learning_rate": 5.622156810275623e-07, "loss": 0.5323, "step": 3272 }, { "epoch": 0.4379181161359379, "grad_norm": 12.91335678100586, "learning_rate": 5.62081883864062e-07, "loss": 0.5127, "step": 3273 }, { "epoch": 0.43805191329943804, "grad_norm": 15.901368141174316, "learning_rate": 5.619480867005619e-07, "loss": 0.4, "step": 3274 }, { "epoch": 0.4381857104629382, "grad_norm": 23.081735610961914, "learning_rate": 5.618142895370618e-07, "loss": 0.4267, "step": 3275 }, { "epoch": 0.43831950762643834, "grad_norm": 19.50996971130371, "learning_rate": 5.616804923735617e-07, "loss": 0.6294, "step": 3276 }, { "epoch": 0.43845330478993844, "grad_norm": 35.83197784423828, "learning_rate": 5.615466952100614e-07, "loss": 0.5883, "step": 3277 }, { "epoch": 0.4385871019534386, "grad_norm": 29.69048500061035, "learning_rate": 5.614128980465613e-07, "loss": 0.3903, "step": 3278 }, { "epoch": 0.43872089911693873, "grad_norm": 33.14921569824219, "learning_rate": 5.612791008830613e-07, "loss": 0.4329, "step": 3279 }, { "epoch": 0.4388546962804388, "grad_norm": 34.90449905395508, "learning_rate": 5.611453037195612e-07, "loss": 0.6072, "step": 3280 }, { "epoch": 0.438988493443939, "grad_norm": 21.423789978027344, "learning_rate": 5.61011506556061e-07, "loss": 0.5255, "step": 3281 }, { "epoch": 0.4391222906074391, "grad_norm": 18.973613739013672, "learning_rate": 5.608777093925608e-07, "loss": 0.5106, "step": 3282 }, { "epoch": 0.4392560877709393, "grad_norm": 21.511821746826172, "learning_rate": 5.607439122290607e-07, "loss": 0.505, "step": 3283 }, { "epoch": 0.43938988493443937, "grad_norm": 23.62993049621582, "learning_rate": 5.606101150655606e-07, "loss": 0.4309, "step": 3284 }, { "epoch": 0.4395236820979395, "grad_norm": 28.526330947875977, "learning_rate": 5.604763179020604e-07, "loss": 0.6145, "step": 3285 }, { "epoch": 0.43965747926143967, "grad_norm": 29.74192237854004, "learning_rate": 5.603425207385604e-07, "loss": 0.3786, "step": 3286 }, { "epoch": 0.4397912764249398, "grad_norm": 21.539419174194336, "learning_rate": 5.602087235750602e-07, "loss": 0.598, "step": 3287 }, { "epoch": 0.4399250735884399, "grad_norm": 16.000818252563477, "learning_rate": 5.600749264115601e-07, "loss": 0.4489, "step": 3288 }, { "epoch": 0.44005887075194006, "grad_norm": 18.969331741333008, "learning_rate": 5.599411292480599e-07, "loss": 0.4719, "step": 3289 }, { "epoch": 0.4401926679154402, "grad_norm": 22.053327560424805, "learning_rate": 5.598073320845598e-07, "loss": 0.4869, "step": 3290 }, { "epoch": 0.4403264650789403, "grad_norm": 38.48855209350586, "learning_rate": 5.596735349210596e-07, "loss": 0.598, "step": 3291 }, { "epoch": 0.44046026224244045, "grad_norm": 39.593833923339844, "learning_rate": 5.595397377575594e-07, "loss": 0.4198, "step": 3292 }, { "epoch": 0.4405940594059406, "grad_norm": 36.02135467529297, "learning_rate": 5.594059405940594e-07, "loss": 0.4056, "step": 3293 }, { "epoch": 0.44072785656944075, "grad_norm": 24.04949188232422, "learning_rate": 5.592721434305593e-07, "loss": 0.3798, "step": 3294 }, { "epoch": 0.44086165373294084, "grad_norm": 23.797544479370117, "learning_rate": 5.591383462670592e-07, "loss": 0.4612, "step": 3295 }, { "epoch": 0.440995450896441, "grad_norm": 36.85647201538086, "learning_rate": 5.590045491035589e-07, "loss": 0.3953, "step": 3296 }, { "epoch": 0.44112924805994114, "grad_norm": 25.22797966003418, "learning_rate": 5.588707519400588e-07, "loss": 0.4291, "step": 3297 }, { "epoch": 0.4412630452234413, "grad_norm": 29.171527862548828, "learning_rate": 5.587369547765587e-07, "loss": 0.479, "step": 3298 }, { "epoch": 0.4413968423869414, "grad_norm": 17.1924991607666, "learning_rate": 5.586031576130586e-07, "loss": 0.2343, "step": 3299 }, { "epoch": 0.44153063955044153, "grad_norm": 31.722259521484375, "learning_rate": 5.584693604495583e-07, "loss": 0.4503, "step": 3300 }, { "epoch": 0.4416644367139417, "grad_norm": 16.098037719726562, "learning_rate": 5.583355632860583e-07, "loss": 0.4709, "step": 3301 }, { "epoch": 0.44179823387744177, "grad_norm": 22.240570068359375, "learning_rate": 5.582017661225582e-07, "loss": 0.3746, "step": 3302 }, { "epoch": 0.4419320310409419, "grad_norm": 27.506378173828125, "learning_rate": 5.580679689590581e-07, "loss": 0.2414, "step": 3303 }, { "epoch": 0.44206582820444207, "grad_norm": 17.56031036376953, "learning_rate": 5.579341717955579e-07, "loss": 0.4238, "step": 3304 }, { "epoch": 0.4421996253679422, "grad_norm": 11.493874549865723, "learning_rate": 5.578003746320577e-07, "loss": 0.2396, "step": 3305 }, { "epoch": 0.4423334225314423, "grad_norm": 11.707161903381348, "learning_rate": 5.576665774685576e-07, "loss": 0.2974, "step": 3306 }, { "epoch": 0.44246721969494246, "grad_norm": 23.31096076965332, "learning_rate": 5.575327803050575e-07, "loss": 0.3552, "step": 3307 }, { "epoch": 0.4426010168584426, "grad_norm": 19.973817825317383, "learning_rate": 5.573989831415574e-07, "loss": 0.4028, "step": 3308 }, { "epoch": 0.44273481402194276, "grad_norm": 30.16774559020996, "learning_rate": 5.572651859780573e-07, "loss": 0.6159, "step": 3309 }, { "epoch": 0.44286861118544285, "grad_norm": 22.674789428710938, "learning_rate": 5.571313888145571e-07, "loss": 0.3694, "step": 3310 }, { "epoch": 0.443002408348943, "grad_norm": 39.7274055480957, "learning_rate": 5.56997591651057e-07, "loss": 0.7045, "step": 3311 }, { "epoch": 0.44313620551244315, "grad_norm": 20.63019371032715, "learning_rate": 5.568637944875568e-07, "loss": 0.4114, "step": 3312 }, { "epoch": 0.4432700026759433, "grad_norm": 37.11985778808594, "learning_rate": 5.567299973240567e-07, "loss": 0.4647, "step": 3313 }, { "epoch": 0.4434037998394434, "grad_norm": 15.4974946975708, "learning_rate": 5.565962001605566e-07, "loss": 0.332, "step": 3314 }, { "epoch": 0.44353759700294354, "grad_norm": 35.87019348144531, "learning_rate": 5.564624029970565e-07, "loss": 0.7049, "step": 3315 }, { "epoch": 0.4436713941664437, "grad_norm": 16.01710319519043, "learning_rate": 5.563286058335563e-07, "loss": 0.4105, "step": 3316 }, { "epoch": 0.4438051913299438, "grad_norm": 31.75879669189453, "learning_rate": 5.561948086700562e-07, "loss": 0.4187, "step": 3317 }, { "epoch": 0.44393898849344393, "grad_norm": 28.78887367248535, "learning_rate": 5.560610115065561e-07, "loss": 0.4597, "step": 3318 }, { "epoch": 0.4440727856569441, "grad_norm": 17.627466201782227, "learning_rate": 5.559272143430558e-07, "loss": 0.4318, "step": 3319 }, { "epoch": 0.44420658282044423, "grad_norm": 23.552350997924805, "learning_rate": 5.557934171795557e-07, "loss": 0.4724, "step": 3320 }, { "epoch": 0.4443403799839443, "grad_norm": 17.21389389038086, "learning_rate": 5.556596200160556e-07, "loss": 0.4999, "step": 3321 }, { "epoch": 0.44447417714744447, "grad_norm": 26.350736618041992, "learning_rate": 5.555258228525555e-07, "loss": 0.4317, "step": 3322 }, { "epoch": 0.4446079743109446, "grad_norm": 23.218637466430664, "learning_rate": 5.553920256890554e-07, "loss": 0.3225, "step": 3323 }, { "epoch": 0.44474177147444477, "grad_norm": 17.133541107177734, "learning_rate": 5.552582285255552e-07, "loss": 0.5113, "step": 3324 }, { "epoch": 0.44487556863794486, "grad_norm": 21.73007583618164, "learning_rate": 5.551244313620551e-07, "loss": 0.4702, "step": 3325 }, { "epoch": 0.445009365801445, "grad_norm": 41.40461349487305, "learning_rate": 5.54990634198555e-07, "loss": 0.3048, "step": 3326 }, { "epoch": 0.44514316296494516, "grad_norm": 24.143890380859375, "learning_rate": 5.548568370350548e-07, "loss": 0.2524, "step": 3327 }, { "epoch": 0.44527696012844525, "grad_norm": 26.549562454223633, "learning_rate": 5.547230398715546e-07, "loss": 0.5889, "step": 3328 }, { "epoch": 0.4454107572919454, "grad_norm": 28.205123901367188, "learning_rate": 5.545892427080545e-07, "loss": 0.4919, "step": 3329 }, { "epoch": 0.44554455445544555, "grad_norm": 24.55028533935547, "learning_rate": 5.544554455445545e-07, "loss": 0.2419, "step": 3330 }, { "epoch": 0.4456783516189457, "grad_norm": 15.979634284973145, "learning_rate": 5.543216483810543e-07, "loss": 0.4626, "step": 3331 }, { "epoch": 0.4458121487824458, "grad_norm": 27.80440330505371, "learning_rate": 5.541878512175542e-07, "loss": 0.2511, "step": 3332 }, { "epoch": 0.44594594594594594, "grad_norm": 23.906885147094727, "learning_rate": 5.54054054054054e-07, "loss": 0.5697, "step": 3333 }, { "epoch": 0.4460797431094461, "grad_norm": 33.03300857543945, "learning_rate": 5.539202568905539e-07, "loss": 0.3595, "step": 3334 }, { "epoch": 0.44621354027294624, "grad_norm": 21.503448486328125, "learning_rate": 5.537864597270537e-07, "loss": 0.3044, "step": 3335 }, { "epoch": 0.44634733743644633, "grad_norm": 20.14994239807129, "learning_rate": 5.536526625635536e-07, "loss": 0.6804, "step": 3336 }, { "epoch": 0.4464811345999465, "grad_norm": 17.337085723876953, "learning_rate": 5.535188654000536e-07, "loss": 0.3802, "step": 3337 }, { "epoch": 0.44661493176344663, "grad_norm": 19.945173263549805, "learning_rate": 5.533850682365534e-07, "loss": 0.463, "step": 3338 }, { "epoch": 0.4467487289269467, "grad_norm": 13.731951713562012, "learning_rate": 5.532512710730532e-07, "loss": 0.2477, "step": 3339 }, { "epoch": 0.4468825260904469, "grad_norm": 37.71767807006836, "learning_rate": 5.531174739095531e-07, "loss": 0.5524, "step": 3340 }, { "epoch": 0.447016323253947, "grad_norm": 50.942073822021484, "learning_rate": 5.52983676746053e-07, "loss": 0.8027, "step": 3341 }, { "epoch": 0.44715012041744717, "grad_norm": 53.160736083984375, "learning_rate": 5.528498795825529e-07, "loss": 0.5993, "step": 3342 }, { "epoch": 0.44728391758094727, "grad_norm": 58.217281341552734, "learning_rate": 5.527160824190526e-07, "loss": 0.8415, "step": 3343 }, { "epoch": 0.4474177147444474, "grad_norm": 26.66815185546875, "learning_rate": 5.525822852555525e-07, "loss": 0.4984, "step": 3344 }, { "epoch": 0.44755151190794756, "grad_norm": 28.0050048828125, "learning_rate": 5.524484880920525e-07, "loss": 0.5705, "step": 3345 }, { "epoch": 0.4476853090714477, "grad_norm": 26.91765785217285, "learning_rate": 5.523146909285524e-07, "loss": 0.407, "step": 3346 }, { "epoch": 0.4478191062349478, "grad_norm": 16.770854949951172, "learning_rate": 5.521808937650521e-07, "loss": 0.4862, "step": 3347 }, { "epoch": 0.44795290339844795, "grad_norm": 12.198723793029785, "learning_rate": 5.52047096601552e-07, "loss": 0.2097, "step": 3348 }, { "epoch": 0.4480867005619481, "grad_norm": 21.305377960205078, "learning_rate": 5.519132994380519e-07, "loss": 0.3499, "step": 3349 }, { "epoch": 0.4482204977254482, "grad_norm": 24.0690860748291, "learning_rate": 5.517795022745517e-07, "loss": 0.7256, "step": 3350 }, { "epoch": 0.44835429488894835, "grad_norm": 35.7537727355957, "learning_rate": 5.516457051110516e-07, "loss": 0.5231, "step": 3351 }, { "epoch": 0.4484880920524485, "grad_norm": 14.026640892028809, "learning_rate": 5.515119079475515e-07, "loss": 0.4786, "step": 3352 }, { "epoch": 0.44862188921594864, "grad_norm": 12.224194526672363, "learning_rate": 5.513781107840514e-07, "loss": 0.2701, "step": 3353 }, { "epoch": 0.44875568637944874, "grad_norm": 11.745952606201172, "learning_rate": 5.512443136205512e-07, "loss": 0.4216, "step": 3354 }, { "epoch": 0.4488894835429489, "grad_norm": 24.504745483398438, "learning_rate": 5.511105164570511e-07, "loss": 0.4953, "step": 3355 }, { "epoch": 0.44902328070644904, "grad_norm": 15.573790550231934, "learning_rate": 5.50976719293551e-07, "loss": 0.334, "step": 3356 }, { "epoch": 0.4491570778699492, "grad_norm": 23.77032470703125, "learning_rate": 5.508429221300508e-07, "loss": 0.6092, "step": 3357 }, { "epoch": 0.4492908750334493, "grad_norm": 21.818580627441406, "learning_rate": 5.507091249665506e-07, "loss": 0.3991, "step": 3358 }, { "epoch": 0.4494246721969494, "grad_norm": 14.610416412353516, "learning_rate": 5.505753278030506e-07, "loss": 0.3933, "step": 3359 }, { "epoch": 0.4495584693604496, "grad_norm": 16.907289505004883, "learning_rate": 5.504415306395505e-07, "loss": 0.5673, "step": 3360 }, { "epoch": 0.44969226652394967, "grad_norm": 33.32551193237305, "learning_rate": 5.503077334760503e-07, "loss": 0.6071, "step": 3361 }, { "epoch": 0.4498260636874498, "grad_norm": 14.750319480895996, "learning_rate": 5.501739363125501e-07, "loss": 0.4308, "step": 3362 }, { "epoch": 0.44995986085094997, "grad_norm": 33.76567840576172, "learning_rate": 5.5004013914905e-07, "loss": 0.3894, "step": 3363 }, { "epoch": 0.4500936580144501, "grad_norm": 22.20186424255371, "learning_rate": 5.499063419855499e-07, "loss": 0.4172, "step": 3364 }, { "epoch": 0.4502274551779502, "grad_norm": 18.995637893676758, "learning_rate": 5.497725448220498e-07, "loss": 0.3736, "step": 3365 }, { "epoch": 0.45036125234145036, "grad_norm": 18.08953285217285, "learning_rate": 5.496387476585495e-07, "loss": 0.3917, "step": 3366 }, { "epoch": 0.4504950495049505, "grad_norm": 25.63201904296875, "learning_rate": 5.495049504950495e-07, "loss": 0.4689, "step": 3367 }, { "epoch": 0.45062884666845066, "grad_norm": 16.39924430847168, "learning_rate": 5.493711533315494e-07, "loss": 0.4914, "step": 3368 }, { "epoch": 0.45076264383195075, "grad_norm": 15.143438339233398, "learning_rate": 5.492373561680493e-07, "loss": 0.3936, "step": 3369 }, { "epoch": 0.4508964409954509, "grad_norm": 23.473886489868164, "learning_rate": 5.49103559004549e-07, "loss": 0.5643, "step": 3370 }, { "epoch": 0.45103023815895105, "grad_norm": 20.238813400268555, "learning_rate": 5.489697618410489e-07, "loss": 0.3122, "step": 3371 }, { "epoch": 0.45116403532245114, "grad_norm": 19.013269424438477, "learning_rate": 5.488359646775488e-07, "loss": 0.4151, "step": 3372 }, { "epoch": 0.4512978324859513, "grad_norm": 15.140726089477539, "learning_rate": 5.487021675140486e-07, "loss": 0.483, "step": 3373 }, { "epoch": 0.45143162964945144, "grad_norm": 25.456811904907227, "learning_rate": 5.485683703505486e-07, "loss": 0.3857, "step": 3374 }, { "epoch": 0.4515654268129516, "grad_norm": 26.357929229736328, "learning_rate": 5.484345731870484e-07, "loss": 0.4993, "step": 3375 }, { "epoch": 0.4516992239764517, "grad_norm": 21.570096969604492, "learning_rate": 5.483007760235483e-07, "loss": 0.3465, "step": 3376 }, { "epoch": 0.45183302113995183, "grad_norm": 23.64383888244629, "learning_rate": 5.481669788600481e-07, "loss": 0.5143, "step": 3377 }, { "epoch": 0.451966818303452, "grad_norm": 27.447477340698242, "learning_rate": 5.48033181696548e-07, "loss": 0.5661, "step": 3378 }, { "epoch": 0.4521006154669521, "grad_norm": 25.35541534423828, "learning_rate": 5.478993845330479e-07, "loss": 0.5483, "step": 3379 }, { "epoch": 0.4522344126304522, "grad_norm": 18.8485050201416, "learning_rate": 5.477655873695477e-07, "loss": 0.3506, "step": 3380 }, { "epoch": 0.45236820979395237, "grad_norm": 13.0349702835083, "learning_rate": 5.476317902060476e-07, "loss": 0.4848, "step": 3381 }, { "epoch": 0.4525020069574525, "grad_norm": 21.077207565307617, "learning_rate": 5.474979930425475e-07, "loss": 0.3377, "step": 3382 }, { "epoch": 0.4526358041209526, "grad_norm": 42.81723403930664, "learning_rate": 5.473641958790474e-07, "loss": 0.4877, "step": 3383 }, { "epoch": 0.45276960128445276, "grad_norm": 17.28203010559082, "learning_rate": 5.472303987155473e-07, "loss": 0.4581, "step": 3384 }, { "epoch": 0.4529033984479529, "grad_norm": 23.970680236816406, "learning_rate": 5.47096601552047e-07, "loss": 0.6899, "step": 3385 }, { "epoch": 0.45303719561145306, "grad_norm": 19.838937759399414, "learning_rate": 5.469628043885469e-07, "loss": 0.3358, "step": 3386 }, { "epoch": 0.45317099277495315, "grad_norm": 19.96144676208496, "learning_rate": 5.468290072250468e-07, "loss": 0.5053, "step": 3387 }, { "epoch": 0.4533047899384533, "grad_norm": 21.1656494140625, "learning_rate": 5.466952100615467e-07, "loss": 0.6394, "step": 3388 }, { "epoch": 0.45343858710195345, "grad_norm": 38.08667755126953, "learning_rate": 5.465614128980465e-07, "loss": 0.459, "step": 3389 }, { "epoch": 0.4535723842654536, "grad_norm": 41.25019073486328, "learning_rate": 5.464276157345464e-07, "loss": 0.4332, "step": 3390 }, { "epoch": 0.4537061814289537, "grad_norm": 17.084972381591797, "learning_rate": 5.462938185710463e-07, "loss": 0.4107, "step": 3391 }, { "epoch": 0.45383997859245384, "grad_norm": 24.290584564208984, "learning_rate": 5.461600214075462e-07, "loss": 0.3947, "step": 3392 }, { "epoch": 0.453973775755954, "grad_norm": 30.471511840820312, "learning_rate": 5.46026224244046e-07, "loss": 0.3525, "step": 3393 }, { "epoch": 0.4541075729194541, "grad_norm": 36.77585220336914, "learning_rate": 5.458924270805458e-07, "loss": 0.3352, "step": 3394 }, { "epoch": 0.45424137008295423, "grad_norm": 13.201654434204102, "learning_rate": 5.457586299170457e-07, "loss": 0.4999, "step": 3395 }, { "epoch": 0.4543751672464544, "grad_norm": 40.4196662902832, "learning_rate": 5.456248327535457e-07, "loss": 0.563, "step": 3396 }, { "epoch": 0.45450896440995453, "grad_norm": 20.390731811523438, "learning_rate": 5.454910355900455e-07, "loss": 0.558, "step": 3397 }, { "epoch": 0.4546427615734546, "grad_norm": 41.92556381225586, "learning_rate": 5.453572384265454e-07, "loss": 0.5797, "step": 3398 }, { "epoch": 0.4547765587369548, "grad_norm": 9.246241569519043, "learning_rate": 5.452234412630452e-07, "loss": 0.4411, "step": 3399 }, { "epoch": 0.4549103559004549, "grad_norm": 24.09770965576172, "learning_rate": 5.45089644099545e-07, "loss": 0.4798, "step": 3400 }, { "epoch": 0.45504415306395507, "grad_norm": 16.9919376373291, "learning_rate": 5.449558469360449e-07, "loss": 0.4007, "step": 3401 }, { "epoch": 0.45517795022745516, "grad_norm": 13.648521423339844, "learning_rate": 5.448220497725448e-07, "loss": 0.4203, "step": 3402 }, { "epoch": 0.4553117473909553, "grad_norm": 28.01886558532715, "learning_rate": 5.446882526090447e-07, "loss": 0.5117, "step": 3403 }, { "epoch": 0.45544554455445546, "grad_norm": 26.750141143798828, "learning_rate": 5.445544554455445e-07, "loss": 0.8033, "step": 3404 }, { "epoch": 0.45557934171795555, "grad_norm": 40.26789855957031, "learning_rate": 5.444206582820444e-07, "loss": 0.3679, "step": 3405 }, { "epoch": 0.4557131388814557, "grad_norm": 23.07971954345703, "learning_rate": 5.442868611185443e-07, "loss": 0.3824, "step": 3406 }, { "epoch": 0.45584693604495585, "grad_norm": 15.413232803344727, "learning_rate": 5.441530639550442e-07, "loss": 0.3898, "step": 3407 }, { "epoch": 0.455980733208456, "grad_norm": 19.6287841796875, "learning_rate": 5.440192667915439e-07, "loss": 0.4461, "step": 3408 }, { "epoch": 0.4561145303719561, "grad_norm": 30.76749610900879, "learning_rate": 5.438854696280438e-07, "loss": 0.3006, "step": 3409 }, { "epoch": 0.45624832753545624, "grad_norm": 19.520469665527344, "learning_rate": 5.437516724645437e-07, "loss": 0.4253, "step": 3410 }, { "epoch": 0.4563821246989564, "grad_norm": 20.957958221435547, "learning_rate": 5.436178753010437e-07, "loss": 0.3259, "step": 3411 }, { "epoch": 0.45651592186245654, "grad_norm": 12.027129173278809, "learning_rate": 5.434840781375434e-07, "loss": 0.4121, "step": 3412 }, { "epoch": 0.45664971902595664, "grad_norm": 20.580711364746094, "learning_rate": 5.433502809740433e-07, "loss": 0.3314, "step": 3413 }, { "epoch": 0.4567835161894568, "grad_norm": 16.558555603027344, "learning_rate": 5.432164838105432e-07, "loss": 0.344, "step": 3414 }, { "epoch": 0.45691731335295693, "grad_norm": 18.441701889038086, "learning_rate": 5.430826866470431e-07, "loss": 0.6797, "step": 3415 }, { "epoch": 0.457051110516457, "grad_norm": 14.81726360321045, "learning_rate": 5.429488894835429e-07, "loss": 0.3266, "step": 3416 }, { "epoch": 0.4571849076799572, "grad_norm": 25.36564826965332, "learning_rate": 5.428150923200427e-07, "loss": 0.4924, "step": 3417 }, { "epoch": 0.4573187048434573, "grad_norm": 16.820722579956055, "learning_rate": 5.426812951565427e-07, "loss": 0.4287, "step": 3418 }, { "epoch": 0.4574525020069575, "grad_norm": 14.987529754638672, "learning_rate": 5.425474979930426e-07, "loss": 0.4199, "step": 3419 }, { "epoch": 0.45758629917045757, "grad_norm": 26.632080078125, "learning_rate": 5.424137008295424e-07, "loss": 0.5177, "step": 3420 }, { "epoch": 0.4577200963339577, "grad_norm": 16.233306884765625, "learning_rate": 5.422799036660423e-07, "loss": 0.4025, "step": 3421 }, { "epoch": 0.45785389349745786, "grad_norm": 15.045428276062012, "learning_rate": 5.421461065025421e-07, "loss": 0.4165, "step": 3422 }, { "epoch": 0.457987690660958, "grad_norm": 33.00197982788086, "learning_rate": 5.420123093390419e-07, "loss": 0.288, "step": 3423 }, { "epoch": 0.4581214878244581, "grad_norm": 17.282909393310547, "learning_rate": 5.418785121755418e-07, "loss": 0.5016, "step": 3424 }, { "epoch": 0.45825528498795826, "grad_norm": 9.619023323059082, "learning_rate": 5.417447150120418e-07, "loss": 0.3057, "step": 3425 }, { "epoch": 0.4583890821514584, "grad_norm": 21.326250076293945, "learning_rate": 5.416109178485417e-07, "loss": 0.2938, "step": 3426 }, { "epoch": 0.4585228793149585, "grad_norm": 20.98230743408203, "learning_rate": 5.414771206850414e-07, "loss": 0.2836, "step": 3427 }, { "epoch": 0.45865667647845865, "grad_norm": 18.714059829711914, "learning_rate": 5.413433235215413e-07, "loss": 0.528, "step": 3428 }, { "epoch": 0.4587904736419588, "grad_norm": 16.696619033813477, "learning_rate": 5.412095263580412e-07, "loss": 0.4341, "step": 3429 }, { "epoch": 0.45892427080545894, "grad_norm": 26.16579246520996, "learning_rate": 5.410757291945411e-07, "loss": 0.6544, "step": 3430 }, { "epoch": 0.45905806796895904, "grad_norm": 12.072846412658691, "learning_rate": 5.409419320310408e-07, "loss": 0.3158, "step": 3431 }, { "epoch": 0.4591918651324592, "grad_norm": 23.412681579589844, "learning_rate": 5.408081348675407e-07, "loss": 0.2654, "step": 3432 }, { "epoch": 0.45932566229595934, "grad_norm": 27.555273056030273, "learning_rate": 5.406743377040407e-07, "loss": 0.7663, "step": 3433 }, { "epoch": 0.4594594594594595, "grad_norm": 39.04740905761719, "learning_rate": 5.405405405405406e-07, "loss": 0.5603, "step": 3434 }, { "epoch": 0.4595932566229596, "grad_norm": 15.212327003479004, "learning_rate": 5.404067433770404e-07, "loss": 0.4156, "step": 3435 }, { "epoch": 0.4597270537864597, "grad_norm": 31.8503360748291, "learning_rate": 5.402729462135402e-07, "loss": 0.3392, "step": 3436 }, { "epoch": 0.4598608509499599, "grad_norm": 18.48019027709961, "learning_rate": 5.401391490500401e-07, "loss": 0.457, "step": 3437 }, { "epoch": 0.45999464811345997, "grad_norm": 25.41646957397461, "learning_rate": 5.4000535188654e-07, "loss": 0.1994, "step": 3438 }, { "epoch": 0.4601284452769601, "grad_norm": 34.8560676574707, "learning_rate": 5.398715547230398e-07, "loss": 0.5709, "step": 3439 }, { "epoch": 0.46026224244046027, "grad_norm": 22.920839309692383, "learning_rate": 5.397377575595398e-07, "loss": 0.4876, "step": 3440 }, { "epoch": 0.4603960396039604, "grad_norm": 15.665233612060547, "learning_rate": 5.396039603960396e-07, "loss": 0.448, "step": 3441 }, { "epoch": 0.4605298367674605, "grad_norm": 29.582324981689453, "learning_rate": 5.394701632325395e-07, "loss": 0.5682, "step": 3442 }, { "epoch": 0.46066363393096066, "grad_norm": 54.78908920288086, "learning_rate": 5.393363660690393e-07, "loss": 0.5969, "step": 3443 }, { "epoch": 0.4607974310944608, "grad_norm": 14.468111038208008, "learning_rate": 5.392025689055392e-07, "loss": 0.5577, "step": 3444 }, { "epoch": 0.46093122825796096, "grad_norm": 45.2048225402832, "learning_rate": 5.39068771742039e-07, "loss": 0.5045, "step": 3445 }, { "epoch": 0.46106502542146105, "grad_norm": 26.853595733642578, "learning_rate": 5.389349745785389e-07, "loss": 0.6665, "step": 3446 }, { "epoch": 0.4611988225849612, "grad_norm": 13.414589881896973, "learning_rate": 5.388011774150388e-07, "loss": 0.2661, "step": 3447 }, { "epoch": 0.46133261974846135, "grad_norm": 23.457904815673828, "learning_rate": 5.386673802515387e-07, "loss": 0.4065, "step": 3448 }, { "epoch": 0.46146641691196144, "grad_norm": 19.0379638671875, "learning_rate": 5.385335830880386e-07, "loss": 0.4682, "step": 3449 }, { "epoch": 0.4616002140754616, "grad_norm": 22.02486801147461, "learning_rate": 5.383997859245384e-07, "loss": 0.5673, "step": 3450 }, { "epoch": 0.46173401123896174, "grad_norm": 21.810443878173828, "learning_rate": 5.382659887610382e-07, "loss": 0.443, "step": 3451 }, { "epoch": 0.4618678084024619, "grad_norm": 28.424028396606445, "learning_rate": 5.381321915975381e-07, "loss": 0.5424, "step": 3452 }, { "epoch": 0.462001605565962, "grad_norm": 33.381500244140625, "learning_rate": 5.37998394434038e-07, "loss": 0.5353, "step": 3453 }, { "epoch": 0.46213540272946213, "grad_norm": 17.905900955200195, "learning_rate": 5.378645972705377e-07, "loss": 0.5759, "step": 3454 }, { "epoch": 0.4622691998929623, "grad_norm": 15.402196884155273, "learning_rate": 5.377308001070377e-07, "loss": 0.3573, "step": 3455 }, { "epoch": 0.46240299705646243, "grad_norm": 19.719541549682617, "learning_rate": 5.375970029435376e-07, "loss": 0.3166, "step": 3456 }, { "epoch": 0.4625367942199625, "grad_norm": 29.96443748474121, "learning_rate": 5.374632057800375e-07, "loss": 0.4125, "step": 3457 }, { "epoch": 0.46267059138346267, "grad_norm": 13.157668113708496, "learning_rate": 5.373294086165373e-07, "loss": 0.4152, "step": 3458 }, { "epoch": 0.4628043885469628, "grad_norm": 19.930856704711914, "learning_rate": 5.371956114530371e-07, "loss": 0.5327, "step": 3459 }, { "epoch": 0.4629381857104629, "grad_norm": 38.55824661254883, "learning_rate": 5.37061814289537e-07, "loss": 0.6089, "step": 3460 }, { "epoch": 0.46307198287396306, "grad_norm": 16.57554054260254, "learning_rate": 5.369280171260369e-07, "loss": 0.5834, "step": 3461 }, { "epoch": 0.4632057800374632, "grad_norm": 17.390283584594727, "learning_rate": 5.367942199625368e-07, "loss": 0.4879, "step": 3462 }, { "epoch": 0.46333957720096336, "grad_norm": 21.332347869873047, "learning_rate": 5.366604227990367e-07, "loss": 0.572, "step": 3463 }, { "epoch": 0.46347337436446345, "grad_norm": 23.875289916992188, "learning_rate": 5.365266256355365e-07, "loss": 0.2745, "step": 3464 }, { "epoch": 0.4636071715279636, "grad_norm": 29.832326889038086, "learning_rate": 5.363928284720364e-07, "loss": 0.5141, "step": 3465 }, { "epoch": 0.46374096869146375, "grad_norm": 26.940996170043945, "learning_rate": 5.362590313085362e-07, "loss": 0.533, "step": 3466 }, { "epoch": 0.4638747658549639, "grad_norm": 23.39957046508789, "learning_rate": 5.361252341450361e-07, "loss": 0.5487, "step": 3467 }, { "epoch": 0.464008563018464, "grad_norm": 16.242780685424805, "learning_rate": 5.35991436981536e-07, "loss": 0.4703, "step": 3468 }, { "epoch": 0.46414236018196414, "grad_norm": 13.959771156311035, "learning_rate": 5.358576398180358e-07, "loss": 0.4571, "step": 3469 }, { "epoch": 0.4642761573454643, "grad_norm": 24.292924880981445, "learning_rate": 5.357238426545357e-07, "loss": 0.4853, "step": 3470 }, { "epoch": 0.4644099545089644, "grad_norm": 26.22980499267578, "learning_rate": 5.355900454910356e-07, "loss": 0.4327, "step": 3471 }, { "epoch": 0.46454375167246453, "grad_norm": 26.429981231689453, "learning_rate": 5.354562483275355e-07, "loss": 0.4955, "step": 3472 }, { "epoch": 0.4646775488359647, "grad_norm": 12.36738109588623, "learning_rate": 5.353224511640353e-07, "loss": 0.2646, "step": 3473 }, { "epoch": 0.46481134599946483, "grad_norm": 29.598217010498047, "learning_rate": 5.351886540005351e-07, "loss": 0.4625, "step": 3474 }, { "epoch": 0.4649451431629649, "grad_norm": 19.925962448120117, "learning_rate": 5.35054856837035e-07, "loss": 0.4543, "step": 3475 }, { "epoch": 0.4650789403264651, "grad_norm": 24.60995864868164, "learning_rate": 5.349210596735349e-07, "loss": 0.3924, "step": 3476 }, { "epoch": 0.4652127374899652, "grad_norm": 42.162879943847656, "learning_rate": 5.347872625100349e-07, "loss": 0.593, "step": 3477 }, { "epoch": 0.46534653465346537, "grad_norm": 21.198322296142578, "learning_rate": 5.346534653465346e-07, "loss": 0.6185, "step": 3478 }, { "epoch": 0.46548033181696546, "grad_norm": 21.426576614379883, "learning_rate": 5.345196681830345e-07, "loss": 0.2942, "step": 3479 }, { "epoch": 0.4656141289804656, "grad_norm": 24.00480842590332, "learning_rate": 5.343858710195344e-07, "loss": 0.7023, "step": 3480 }, { "epoch": 0.46574792614396576, "grad_norm": 15.458468437194824, "learning_rate": 5.342520738560342e-07, "loss": 0.3341, "step": 3481 }, { "epoch": 0.46588172330746586, "grad_norm": 37.81427764892578, "learning_rate": 5.34118276692534e-07, "loss": 0.4081, "step": 3482 }, { "epoch": 0.466015520470966, "grad_norm": 9.879461288452148, "learning_rate": 5.339844795290339e-07, "loss": 0.2686, "step": 3483 }, { "epoch": 0.46614931763446615, "grad_norm": 24.955490112304688, "learning_rate": 5.338506823655339e-07, "loss": 0.3725, "step": 3484 }, { "epoch": 0.4662831147979663, "grad_norm": 14.437450408935547, "learning_rate": 5.337168852020337e-07, "loss": 0.3981, "step": 3485 }, { "epoch": 0.4664169119614664, "grad_norm": 17.17585563659668, "learning_rate": 5.335830880385336e-07, "loss": 0.359, "step": 3486 }, { "epoch": 0.46655070912496655, "grad_norm": 14.743854522705078, "learning_rate": 5.334492908750334e-07, "loss": 0.313, "step": 3487 }, { "epoch": 0.4666845062884667, "grad_norm": 11.8461332321167, "learning_rate": 5.333154937115333e-07, "loss": 0.2638, "step": 3488 }, { "epoch": 0.46681830345196684, "grad_norm": 28.260330200195312, "learning_rate": 5.331816965480331e-07, "loss": 0.4943, "step": 3489 }, { "epoch": 0.46695210061546694, "grad_norm": 12.612913131713867, "learning_rate": 5.33047899384533e-07, "loss": 0.2162, "step": 3490 }, { "epoch": 0.4670858977789671, "grad_norm": 21.934816360473633, "learning_rate": 5.329141022210329e-07, "loss": 0.2444, "step": 3491 }, { "epoch": 0.46721969494246723, "grad_norm": 30.181596755981445, "learning_rate": 5.327803050575328e-07, "loss": 0.4624, "step": 3492 }, { "epoch": 0.4673534921059673, "grad_norm": 23.945070266723633, "learning_rate": 5.326465078940326e-07, "loss": 0.6255, "step": 3493 }, { "epoch": 0.4674872892694675, "grad_norm": 8.715603828430176, "learning_rate": 5.325127107305325e-07, "loss": 0.2014, "step": 3494 }, { "epoch": 0.4676210864329676, "grad_norm": 23.376689910888672, "learning_rate": 5.323789135670324e-07, "loss": 0.4334, "step": 3495 }, { "epoch": 0.4677548835964678, "grad_norm": 45.973358154296875, "learning_rate": 5.322451164035322e-07, "loss": 0.6141, "step": 3496 }, { "epoch": 0.46788868075996787, "grad_norm": 16.993820190429688, "learning_rate": 5.32111319240032e-07, "loss": 0.3769, "step": 3497 }, { "epoch": 0.468022477923468, "grad_norm": 50.795528411865234, "learning_rate": 5.319775220765319e-07, "loss": 0.6306, "step": 3498 }, { "epoch": 0.46815627508696817, "grad_norm": 39.785518646240234, "learning_rate": 5.318437249130319e-07, "loss": 0.5513, "step": 3499 }, { "epoch": 0.4682900722504683, "grad_norm": 19.507001876831055, "learning_rate": 5.317099277495318e-07, "loss": 0.2613, "step": 3500 }, { "epoch": 0.4684238694139684, "grad_norm": 22.533369064331055, "learning_rate": 5.315761305860315e-07, "loss": 0.5276, "step": 3501 }, { "epoch": 0.46855766657746856, "grad_norm": 27.831201553344727, "learning_rate": 5.314423334225314e-07, "loss": 0.4173, "step": 3502 }, { "epoch": 0.4686914637409687, "grad_norm": 18.297353744506836, "learning_rate": 5.313085362590313e-07, "loss": 0.4247, "step": 3503 }, { "epoch": 0.4688252609044688, "grad_norm": 13.453357696533203, "learning_rate": 5.311747390955311e-07, "loss": 0.2504, "step": 3504 }, { "epoch": 0.46895905806796895, "grad_norm": 29.001558303833008, "learning_rate": 5.31040941932031e-07, "loss": 0.5705, "step": 3505 }, { "epoch": 0.4690928552314691, "grad_norm": 25.170665740966797, "learning_rate": 5.309071447685309e-07, "loss": 0.5349, "step": 3506 }, { "epoch": 0.46922665239496925, "grad_norm": 36.53276824951172, "learning_rate": 5.307733476050308e-07, "loss": 0.4685, "step": 3507 }, { "epoch": 0.46936044955846934, "grad_norm": 23.468307495117188, "learning_rate": 5.306395504415306e-07, "loss": 0.3793, "step": 3508 }, { "epoch": 0.4694942467219695, "grad_norm": 20.088659286499023, "learning_rate": 5.305057532780305e-07, "loss": 0.3622, "step": 3509 }, { "epoch": 0.46962804388546964, "grad_norm": 22.91321563720703, "learning_rate": 5.303719561145303e-07, "loss": 0.19, "step": 3510 }, { "epoch": 0.4697618410489698, "grad_norm": 27.5570068359375, "learning_rate": 5.302381589510302e-07, "loss": 0.3641, "step": 3511 }, { "epoch": 0.4698956382124699, "grad_norm": 19.160491943359375, "learning_rate": 5.3010436178753e-07, "loss": 0.3326, "step": 3512 }, { "epoch": 0.47002943537597003, "grad_norm": 25.763282775878906, "learning_rate": 5.299705646240299e-07, "loss": 0.5233, "step": 3513 }, { "epoch": 0.4701632325394702, "grad_norm": 48.126094818115234, "learning_rate": 5.298367674605299e-07, "loss": 0.4199, "step": 3514 }, { "epoch": 0.47029702970297027, "grad_norm": 11.863720893859863, "learning_rate": 5.297029702970297e-07, "loss": 0.2563, "step": 3515 }, { "epoch": 0.4704308268664704, "grad_norm": 19.127355575561523, "learning_rate": 5.295691731335295e-07, "loss": 0.5666, "step": 3516 }, { "epoch": 0.47056462402997057, "grad_norm": 18.88753318786621, "learning_rate": 5.294353759700294e-07, "loss": 0.4624, "step": 3517 }, { "epoch": 0.4706984211934707, "grad_norm": 17.62274742126465, "learning_rate": 5.293015788065293e-07, "loss": 0.5391, "step": 3518 }, { "epoch": 0.4708322183569708, "grad_norm": 28.318923950195312, "learning_rate": 5.291677816430292e-07, "loss": 0.5042, "step": 3519 }, { "epoch": 0.47096601552047096, "grad_norm": 24.544193267822266, "learning_rate": 5.290339844795289e-07, "loss": 0.5069, "step": 3520 }, { "epoch": 0.4710998126839711, "grad_norm": 35.27249526977539, "learning_rate": 5.289001873160289e-07, "loss": 0.5188, "step": 3521 }, { "epoch": 0.47123360984747126, "grad_norm": 23.520793914794922, "learning_rate": 5.287663901525288e-07, "loss": 0.7164, "step": 3522 }, { "epoch": 0.47136740701097135, "grad_norm": 34.95111846923828, "learning_rate": 5.286325929890287e-07, "loss": 0.4675, "step": 3523 }, { "epoch": 0.4715012041744715, "grad_norm": 20.377784729003906, "learning_rate": 5.284987958255284e-07, "loss": 0.3154, "step": 3524 }, { "epoch": 0.47163500133797165, "grad_norm": 29.783424377441406, "learning_rate": 5.283649986620283e-07, "loss": 0.5955, "step": 3525 }, { "epoch": 0.47176879850147174, "grad_norm": 12.27181339263916, "learning_rate": 5.282312014985282e-07, "loss": 0.3616, "step": 3526 }, { "epoch": 0.4719025956649719, "grad_norm": 14.174753189086914, "learning_rate": 5.28097404335028e-07, "loss": 0.4532, "step": 3527 }, { "epoch": 0.47203639282847204, "grad_norm": 40.14020538330078, "learning_rate": 5.27963607171528e-07, "loss": 0.6292, "step": 3528 }, { "epoch": 0.4721701899919722, "grad_norm": 29.447311401367188, "learning_rate": 5.278298100080278e-07, "loss": 0.5863, "step": 3529 }, { "epoch": 0.4723039871554723, "grad_norm": 13.854663848876953, "learning_rate": 5.276960128445277e-07, "loss": 0.3093, "step": 3530 }, { "epoch": 0.47243778431897243, "grad_norm": 15.16612720489502, "learning_rate": 5.275622156810275e-07, "loss": 0.4438, "step": 3531 }, { "epoch": 0.4725715814824726, "grad_norm": 18.46872329711914, "learning_rate": 5.274284185175274e-07, "loss": 0.2948, "step": 3532 }, { "epoch": 0.47270537864597273, "grad_norm": 14.110600471496582, "learning_rate": 5.272946213540273e-07, "loss": 0.2645, "step": 3533 }, { "epoch": 0.4728391758094728, "grad_norm": 14.964689254760742, "learning_rate": 5.271608241905271e-07, "loss": 0.3801, "step": 3534 }, { "epoch": 0.47297297297297297, "grad_norm": 13.707052230834961, "learning_rate": 5.270270270270269e-07, "loss": 0.3396, "step": 3535 }, { "epoch": 0.4731067701364731, "grad_norm": 16.128089904785156, "learning_rate": 5.268932298635269e-07, "loss": 0.4136, "step": 3536 }, { "epoch": 0.4732405672999732, "grad_norm": 17.09556007385254, "learning_rate": 5.267594327000268e-07, "loss": 0.4363, "step": 3537 }, { "epoch": 0.47337436446347336, "grad_norm": 16.312803268432617, "learning_rate": 5.266256355365266e-07, "loss": 0.117, "step": 3538 }, { "epoch": 0.4735081616269735, "grad_norm": 25.642724990844727, "learning_rate": 5.264918383730264e-07, "loss": 0.4318, "step": 3539 }, { "epoch": 0.47364195879047366, "grad_norm": 41.7197151184082, "learning_rate": 5.263580412095263e-07, "loss": 0.6046, "step": 3540 }, { "epoch": 0.47377575595397375, "grad_norm": 44.96480178833008, "learning_rate": 5.262242440460262e-07, "loss": 0.6234, "step": 3541 }, { "epoch": 0.4739095531174739, "grad_norm": 14.965741157531738, "learning_rate": 5.260904468825261e-07, "loss": 0.3144, "step": 3542 }, { "epoch": 0.47404335028097405, "grad_norm": 25.54220962524414, "learning_rate": 5.259566497190259e-07, "loss": 0.3131, "step": 3543 }, { "epoch": 0.4741771474444742, "grad_norm": 28.08452796936035, "learning_rate": 5.258228525555258e-07, "loss": 0.4424, "step": 3544 }, { "epoch": 0.4743109446079743, "grad_norm": 19.997753143310547, "learning_rate": 5.256890553920257e-07, "loss": 0.1257, "step": 3545 }, { "epoch": 0.47444474177147444, "grad_norm": 19.012643814086914, "learning_rate": 5.255552582285256e-07, "loss": 0.3667, "step": 3546 }, { "epoch": 0.4745785389349746, "grad_norm": 10.472304344177246, "learning_rate": 5.254214610650254e-07, "loss": 0.2152, "step": 3547 }, { "epoch": 0.4747123360984747, "grad_norm": 22.551767349243164, "learning_rate": 5.252876639015252e-07, "loss": 0.3963, "step": 3548 }, { "epoch": 0.47484613326197483, "grad_norm": 31.489660263061523, "learning_rate": 5.251538667380251e-07, "loss": 0.6654, "step": 3549 }, { "epoch": 0.474979930425475, "grad_norm": 35.85904312133789, "learning_rate": 5.250200695745251e-07, "loss": 0.2906, "step": 3550 }, { "epoch": 0.47511372758897513, "grad_norm": 19.841243743896484, "learning_rate": 5.248862724110249e-07, "loss": 0.5329, "step": 3551 }, { "epoch": 0.4752475247524752, "grad_norm": 32.574642181396484, "learning_rate": 5.247524752475247e-07, "loss": 0.5688, "step": 3552 }, { "epoch": 0.4753813219159754, "grad_norm": 29.103256225585938, "learning_rate": 5.246186780840246e-07, "loss": 0.7768, "step": 3553 }, { "epoch": 0.4755151190794755, "grad_norm": 45.825706481933594, "learning_rate": 5.244848809205244e-07, "loss": 0.5803, "step": 3554 }, { "epoch": 0.4756489162429757, "grad_norm": 45.573020935058594, "learning_rate": 5.243510837570243e-07, "loss": 0.5894, "step": 3555 }, { "epoch": 0.47578271340647577, "grad_norm": 21.69307518005371, "learning_rate": 5.242172865935242e-07, "loss": 0.4402, "step": 3556 }, { "epoch": 0.4759165105699759, "grad_norm": 25.424959182739258, "learning_rate": 5.24083489430024e-07, "loss": 0.7158, "step": 3557 }, { "epoch": 0.47605030773347606, "grad_norm": 14.896915435791016, "learning_rate": 5.239496922665239e-07, "loss": 0.3094, "step": 3558 }, { "epoch": 0.47618410489697616, "grad_norm": 24.107919692993164, "learning_rate": 5.238158951030238e-07, "loss": 0.4342, "step": 3559 }, { "epoch": 0.4763179020604763, "grad_norm": 25.378541946411133, "learning_rate": 5.236820979395237e-07, "loss": 0.5474, "step": 3560 }, { "epoch": 0.47645169922397645, "grad_norm": 15.60404109954834, "learning_rate": 5.235483007760236e-07, "loss": 0.4588, "step": 3561 }, { "epoch": 0.4765854963874766, "grad_norm": 19.839181900024414, "learning_rate": 5.234145036125233e-07, "loss": 0.3569, "step": 3562 }, { "epoch": 0.4767192935509767, "grad_norm": 15.373507499694824, "learning_rate": 5.232807064490232e-07, "loss": 0.5529, "step": 3563 }, { "epoch": 0.47685309071447685, "grad_norm": 20.21479606628418, "learning_rate": 5.231469092855231e-07, "loss": 0.5207, "step": 3564 }, { "epoch": 0.476986887877977, "grad_norm": 31.27855110168457, "learning_rate": 5.230131121220231e-07, "loss": 0.3648, "step": 3565 }, { "epoch": 0.47712068504147714, "grad_norm": 34.53052520751953, "learning_rate": 5.228793149585228e-07, "loss": 0.5125, "step": 3566 }, { "epoch": 0.47725448220497724, "grad_norm": 22.308034896850586, "learning_rate": 5.227455177950227e-07, "loss": 0.3426, "step": 3567 }, { "epoch": 0.4773882793684774, "grad_norm": 23.615219116210938, "learning_rate": 5.226117206315226e-07, "loss": 0.3251, "step": 3568 }, { "epoch": 0.47752207653197754, "grad_norm": 22.006084442138672, "learning_rate": 5.224779234680225e-07, "loss": 0.3602, "step": 3569 }, { "epoch": 0.47765587369547763, "grad_norm": 21.358165740966797, "learning_rate": 5.223441263045223e-07, "loss": 0.593, "step": 3570 }, { "epoch": 0.4777896708589778, "grad_norm": 34.412654876708984, "learning_rate": 5.222103291410221e-07, "loss": 0.5239, "step": 3571 }, { "epoch": 0.4779234680224779, "grad_norm": 35.88241958618164, "learning_rate": 5.220765319775221e-07, "loss": 0.3319, "step": 3572 }, { "epoch": 0.4780572651859781, "grad_norm": 27.264570236206055, "learning_rate": 5.21942734814022e-07, "loss": 0.4605, "step": 3573 }, { "epoch": 0.47819106234947817, "grad_norm": 22.016122817993164, "learning_rate": 5.218089376505218e-07, "loss": 0.5588, "step": 3574 }, { "epoch": 0.4783248595129783, "grad_norm": 48.767391204833984, "learning_rate": 5.216751404870217e-07, "loss": 0.4917, "step": 3575 }, { "epoch": 0.47845865667647847, "grad_norm": 20.39612579345703, "learning_rate": 5.215413433235215e-07, "loss": 0.3967, "step": 3576 }, { "epoch": 0.4785924538399786, "grad_norm": 24.177051544189453, "learning_rate": 5.214075461600214e-07, "loss": 0.4576, "step": 3577 }, { "epoch": 0.4787262510034787, "grad_norm": 19.71828842163086, "learning_rate": 5.212737489965212e-07, "loss": 0.5529, "step": 3578 }, { "epoch": 0.47886004816697886, "grad_norm": 38.91845703125, "learning_rate": 5.211399518330211e-07, "loss": 0.4823, "step": 3579 }, { "epoch": 0.478993845330479, "grad_norm": 34.89849853515625, "learning_rate": 5.21006154669521e-07, "loss": 0.3833, "step": 3580 }, { "epoch": 0.4791276424939791, "grad_norm": 41.33415985107422, "learning_rate": 5.208723575060209e-07, "loss": 0.4178, "step": 3581 }, { "epoch": 0.47926143965747925, "grad_norm": 19.84844207763672, "learning_rate": 5.207385603425207e-07, "loss": 0.5875, "step": 3582 }, { "epoch": 0.4793952368209794, "grad_norm": 35.71472930908203, "learning_rate": 5.206047631790206e-07, "loss": 0.419, "step": 3583 }, { "epoch": 0.47952903398447955, "grad_norm": 22.615278244018555, "learning_rate": 5.204709660155205e-07, "loss": 0.4832, "step": 3584 }, { "epoch": 0.47966283114797964, "grad_norm": 20.58814239501953, "learning_rate": 5.203371688520202e-07, "loss": 0.3164, "step": 3585 }, { "epoch": 0.4797966283114798, "grad_norm": 32.70486831665039, "learning_rate": 5.202033716885201e-07, "loss": 0.3505, "step": 3586 }, { "epoch": 0.47993042547497994, "grad_norm": 17.202499389648438, "learning_rate": 5.200695745250201e-07, "loss": 0.5472, "step": 3587 }, { "epoch": 0.4800642226384801, "grad_norm": 15.588428497314453, "learning_rate": 5.1993577736152e-07, "loss": 0.3601, "step": 3588 }, { "epoch": 0.4801980198019802, "grad_norm": 11.234332084655762, "learning_rate": 5.198019801980198e-07, "loss": 0.302, "step": 3589 }, { "epoch": 0.48033181696548033, "grad_norm": 43.0868034362793, "learning_rate": 5.196681830345196e-07, "loss": 0.5592, "step": 3590 }, { "epoch": 0.4804656141289805, "grad_norm": 21.333803176879883, "learning_rate": 5.195343858710195e-07, "loss": 0.5317, "step": 3591 }, { "epoch": 0.48059941129248057, "grad_norm": 24.66790771484375, "learning_rate": 5.194005887075194e-07, "loss": 0.4418, "step": 3592 }, { "epoch": 0.4807332084559807, "grad_norm": 26.793119430541992, "learning_rate": 5.192667915440192e-07, "loss": 0.3887, "step": 3593 }, { "epoch": 0.48086700561948087, "grad_norm": 25.560972213745117, "learning_rate": 5.191329943805191e-07, "loss": 0.5164, "step": 3594 }, { "epoch": 0.481000802782981, "grad_norm": 12.44154167175293, "learning_rate": 5.18999197217019e-07, "loss": 0.3925, "step": 3595 }, { "epoch": 0.4811345999464811, "grad_norm": 16.504562377929688, "learning_rate": 5.188654000535189e-07, "loss": 0.4351, "step": 3596 }, { "epoch": 0.48126839710998126, "grad_norm": 40.694664001464844, "learning_rate": 5.187316028900187e-07, "loss": 0.5724, "step": 3597 }, { "epoch": 0.4814021942734814, "grad_norm": 27.797595977783203, "learning_rate": 5.185978057265186e-07, "loss": 0.6261, "step": 3598 }, { "epoch": 0.48153599143698156, "grad_norm": 27.444543838500977, "learning_rate": 5.184640085630184e-07, "loss": 0.3781, "step": 3599 }, { "epoch": 0.48166978860048165, "grad_norm": 12.861449241638184, "learning_rate": 5.183302113995183e-07, "loss": 0.2635, "step": 3600 }, { "epoch": 0.4818035857639818, "grad_norm": 17.606483459472656, "learning_rate": 5.181964142360181e-07, "loss": 0.4407, "step": 3601 }, { "epoch": 0.48193738292748195, "grad_norm": 23.376466751098633, "learning_rate": 5.180626170725181e-07, "loss": 0.4454, "step": 3602 }, { "epoch": 0.4820711800909821, "grad_norm": 40.641151428222656, "learning_rate": 5.17928819909018e-07, "loss": 0.5843, "step": 3603 }, { "epoch": 0.4822049772544822, "grad_norm": 23.80019760131836, "learning_rate": 5.177950227455178e-07, "loss": 0.4109, "step": 3604 }, { "epoch": 0.48233877441798234, "grad_norm": 16.187843322753906, "learning_rate": 5.176612255820176e-07, "loss": 0.4198, "step": 3605 }, { "epoch": 0.4824725715814825, "grad_norm": 25.287086486816406, "learning_rate": 5.175274284185175e-07, "loss": 0.4189, "step": 3606 }, { "epoch": 0.4826063687449826, "grad_norm": 19.03280258178711, "learning_rate": 5.173936312550174e-07, "loss": 0.2833, "step": 3607 }, { "epoch": 0.48274016590848273, "grad_norm": 14.116992950439453, "learning_rate": 5.172598340915171e-07, "loss": 0.3686, "step": 3608 }, { "epoch": 0.4828739630719829, "grad_norm": 20.873199462890625, "learning_rate": 5.171260369280171e-07, "loss": 0.3071, "step": 3609 }, { "epoch": 0.48300776023548303, "grad_norm": 14.210335731506348, "learning_rate": 5.16992239764517e-07, "loss": 0.4441, "step": 3610 }, { "epoch": 0.4831415573989831, "grad_norm": 17.43087387084961, "learning_rate": 5.168584426010169e-07, "loss": 0.5084, "step": 3611 }, { "epoch": 0.4832753545624833, "grad_norm": 16.71880340576172, "learning_rate": 5.167246454375167e-07, "loss": 0.3535, "step": 3612 }, { "epoch": 0.4834091517259834, "grad_norm": 18.944520950317383, "learning_rate": 5.165908482740165e-07, "loss": 0.3472, "step": 3613 }, { "epoch": 0.48354294888948357, "grad_norm": 23.549148559570312, "learning_rate": 5.164570511105164e-07, "loss": 0.4277, "step": 3614 }, { "epoch": 0.48367674605298366, "grad_norm": 16.823484420776367, "learning_rate": 5.163232539470163e-07, "loss": 0.3816, "step": 3615 }, { "epoch": 0.4838105432164838, "grad_norm": 14.869661331176758, "learning_rate": 5.161894567835162e-07, "loss": 0.373, "step": 3616 }, { "epoch": 0.48394434037998396, "grad_norm": 22.886274337768555, "learning_rate": 5.160556596200161e-07, "loss": 0.4658, "step": 3617 }, { "epoch": 0.48407813754348405, "grad_norm": 14.556363105773926, "learning_rate": 5.159218624565159e-07, "loss": 0.258, "step": 3618 }, { "epoch": 0.4842119347069842, "grad_norm": 25.95209312438965, "learning_rate": 5.157880652930158e-07, "loss": 0.6458, "step": 3619 }, { "epoch": 0.48434573187048435, "grad_norm": 18.47677230834961, "learning_rate": 5.156542681295156e-07, "loss": 0.5855, "step": 3620 }, { "epoch": 0.4844795290339845, "grad_norm": 32.954734802246094, "learning_rate": 5.155204709660155e-07, "loss": 0.5034, "step": 3621 }, { "epoch": 0.4846133261974846, "grad_norm": 15.05030345916748, "learning_rate": 5.153866738025153e-07, "loss": 0.2057, "step": 3622 }, { "epoch": 0.48474712336098474, "grad_norm": 18.96415901184082, "learning_rate": 5.152528766390152e-07, "loss": 0.5013, "step": 3623 }, { "epoch": 0.4848809205244849, "grad_norm": 30.092784881591797, "learning_rate": 5.151190794755151e-07, "loss": 0.5226, "step": 3624 }, { "epoch": 0.48501471768798504, "grad_norm": 25.75876808166504, "learning_rate": 5.14985282312015e-07, "loss": 0.3399, "step": 3625 }, { "epoch": 0.48514851485148514, "grad_norm": 30.330455780029297, "learning_rate": 5.148514851485149e-07, "loss": 0.6463, "step": 3626 }, { "epoch": 0.4852823120149853, "grad_norm": 17.33490562438965, "learning_rate": 5.147176879850147e-07, "loss": 0.3632, "step": 3627 }, { "epoch": 0.48541610917848543, "grad_norm": 21.31711196899414, "learning_rate": 5.145838908215145e-07, "loss": 0.3014, "step": 3628 }, { "epoch": 0.4855499063419855, "grad_norm": 23.952255249023438, "learning_rate": 5.144500936580144e-07, "loss": 0.6363, "step": 3629 }, { "epoch": 0.4856837035054857, "grad_norm": 19.516033172607422, "learning_rate": 5.143162964945143e-07, "loss": 0.4719, "step": 3630 }, { "epoch": 0.4858175006689858, "grad_norm": 22.321821212768555, "learning_rate": 5.141824993310143e-07, "loss": 0.4141, "step": 3631 }, { "epoch": 0.485951297832486, "grad_norm": 14.854228973388672, "learning_rate": 5.14048702167514e-07, "loss": 0.3353, "step": 3632 }, { "epoch": 0.48608509499598607, "grad_norm": 40.52544021606445, "learning_rate": 5.139149050040139e-07, "loss": 0.4807, "step": 3633 }, { "epoch": 0.4862188921594862, "grad_norm": 19.75444984436035, "learning_rate": 5.137811078405138e-07, "loss": 0.4507, "step": 3634 }, { "epoch": 0.48635268932298636, "grad_norm": 39.700836181640625, "learning_rate": 5.136473106770136e-07, "loss": 0.4745, "step": 3635 }, { "epoch": 0.4864864864864865, "grad_norm": 18.078779220581055, "learning_rate": 5.135135135135134e-07, "loss": 0.2765, "step": 3636 }, { "epoch": 0.4866202836499866, "grad_norm": 27.876569747924805, "learning_rate": 5.133797163500133e-07, "loss": 0.541, "step": 3637 }, { "epoch": 0.48675408081348676, "grad_norm": 28.996240615844727, "learning_rate": 5.132459191865133e-07, "loss": 0.3332, "step": 3638 }, { "epoch": 0.4868878779769869, "grad_norm": 18.95992660522461, "learning_rate": 5.131121220230131e-07, "loss": 0.4258, "step": 3639 }, { "epoch": 0.487021675140487, "grad_norm": 21.5943546295166, "learning_rate": 5.12978324859513e-07, "loss": 0.2939, "step": 3640 }, { "epoch": 0.48715547230398715, "grad_norm": 24.689329147338867, "learning_rate": 5.128445276960128e-07, "loss": 0.5137, "step": 3641 }, { "epoch": 0.4872892694674873, "grad_norm": 15.487906455993652, "learning_rate": 5.127107305325127e-07, "loss": 0.2503, "step": 3642 }, { "epoch": 0.48742306663098744, "grad_norm": 44.08299255371094, "learning_rate": 5.125769333690125e-07, "loss": 0.5551, "step": 3643 }, { "epoch": 0.48755686379448754, "grad_norm": 22.144899368286133, "learning_rate": 5.124431362055124e-07, "loss": 0.408, "step": 3644 }, { "epoch": 0.4876906609579877, "grad_norm": 30.818130493164062, "learning_rate": 5.123093390420122e-07, "loss": 0.4427, "step": 3645 }, { "epoch": 0.48782445812148784, "grad_norm": 16.11687469482422, "learning_rate": 5.121755418785122e-07, "loss": 0.3894, "step": 3646 }, { "epoch": 0.487958255284988, "grad_norm": 38.672584533691406, "learning_rate": 5.12041744715012e-07, "loss": 0.6262, "step": 3647 }, { "epoch": 0.4880920524484881, "grad_norm": 14.008149147033691, "learning_rate": 5.119079475515119e-07, "loss": 0.2387, "step": 3648 }, { "epoch": 0.4882258496119882, "grad_norm": 9.988741874694824, "learning_rate": 5.117741503880118e-07, "loss": 0.2844, "step": 3649 }, { "epoch": 0.4883596467754884, "grad_norm": 21.84697914123535, "learning_rate": 5.116403532245116e-07, "loss": 0.3034, "step": 3650 }, { "epoch": 0.48849344393898847, "grad_norm": 32.66492462158203, "learning_rate": 5.115065560610114e-07, "loss": 0.4835, "step": 3651 }, { "epoch": 0.4886272411024886, "grad_norm": 16.38551139831543, "learning_rate": 5.113727588975113e-07, "loss": 0.345, "step": 3652 }, { "epoch": 0.48876103826598877, "grad_norm": 18.341394424438477, "learning_rate": 5.112389617340113e-07, "loss": 0.4671, "step": 3653 }, { "epoch": 0.4888948354294889, "grad_norm": 25.698680877685547, "learning_rate": 5.111051645705112e-07, "loss": 0.5013, "step": 3654 }, { "epoch": 0.489028632592989, "grad_norm": 19.55909538269043, "learning_rate": 5.109713674070109e-07, "loss": 0.3632, "step": 3655 }, { "epoch": 0.48916242975648916, "grad_norm": 24.84797477722168, "learning_rate": 5.108375702435108e-07, "loss": 0.4735, "step": 3656 }, { "epoch": 0.4892962269199893, "grad_norm": 27.644044876098633, "learning_rate": 5.107037730800107e-07, "loss": 0.4466, "step": 3657 }, { "epoch": 0.48943002408348946, "grad_norm": 20.977550506591797, "learning_rate": 5.105699759165105e-07, "loss": 0.2209, "step": 3658 }, { "epoch": 0.48956382124698955, "grad_norm": 24.113561630249023, "learning_rate": 5.104361787530103e-07, "loss": 0.4293, "step": 3659 }, { "epoch": 0.4896976184104897, "grad_norm": 26.128957748413086, "learning_rate": 5.103023815895103e-07, "loss": 0.5248, "step": 3660 }, { "epoch": 0.48983141557398985, "grad_norm": 22.252119064331055, "learning_rate": 5.101685844260102e-07, "loss": 0.4127, "step": 3661 }, { "epoch": 0.48996521273748994, "grad_norm": 16.550739288330078, "learning_rate": 5.1003478726251e-07, "loss": 0.3146, "step": 3662 }, { "epoch": 0.4900990099009901, "grad_norm": 24.540603637695312, "learning_rate": 5.099009900990099e-07, "loss": 0.2234, "step": 3663 }, { "epoch": 0.49023280706449024, "grad_norm": 23.143768310546875, "learning_rate": 5.097671929355097e-07, "loss": 0.3848, "step": 3664 }, { "epoch": 0.4903666042279904, "grad_norm": 25.074613571166992, "learning_rate": 5.096333957720096e-07, "loss": 0.4414, "step": 3665 }, { "epoch": 0.4905004013914905, "grad_norm": 29.66124153137207, "learning_rate": 5.094995986085094e-07, "loss": 0.8146, "step": 3666 }, { "epoch": 0.49063419855499063, "grad_norm": 24.343515396118164, "learning_rate": 5.093658014450093e-07, "loss": 0.3034, "step": 3667 }, { "epoch": 0.4907679957184908, "grad_norm": 18.970895767211914, "learning_rate": 5.092320042815093e-07, "loss": 0.337, "step": 3668 }, { "epoch": 0.49090179288199093, "grad_norm": 13.15945053100586, "learning_rate": 5.090982071180091e-07, "loss": 0.2915, "step": 3669 }, { "epoch": 0.491035590045491, "grad_norm": 35.76084899902344, "learning_rate": 5.089644099545089e-07, "loss": 0.6747, "step": 3670 }, { "epoch": 0.49116938720899117, "grad_norm": 43.12398910522461, "learning_rate": 5.088306127910088e-07, "loss": 0.5981, "step": 3671 }, { "epoch": 0.4913031843724913, "grad_norm": 15.683609008789062, "learning_rate": 5.086968156275087e-07, "loss": 0.2841, "step": 3672 }, { "epoch": 0.4914369815359914, "grad_norm": 20.024215698242188, "learning_rate": 5.085630184640086e-07, "loss": 0.4703, "step": 3673 }, { "epoch": 0.49157077869949156, "grad_norm": 20.016462326049805, "learning_rate": 5.084292213005083e-07, "loss": 0.3019, "step": 3674 }, { "epoch": 0.4917045758629917, "grad_norm": 29.113615036010742, "learning_rate": 5.082954241370083e-07, "loss": 0.4119, "step": 3675 }, { "epoch": 0.49183837302649186, "grad_norm": 30.468034744262695, "learning_rate": 5.081616269735082e-07, "loss": 0.5285, "step": 3676 }, { "epoch": 0.49197217018999195, "grad_norm": 32.66025924682617, "learning_rate": 5.080278298100081e-07, "loss": 0.3979, "step": 3677 }, { "epoch": 0.4921059673534921, "grad_norm": 23.553436279296875, "learning_rate": 5.078940326465078e-07, "loss": 0.1427, "step": 3678 }, { "epoch": 0.49223976451699225, "grad_norm": 36.06169128417969, "learning_rate": 5.077602354830077e-07, "loss": 0.5999, "step": 3679 }, { "epoch": 0.4923735616804924, "grad_norm": 29.997909545898438, "learning_rate": 5.076264383195076e-07, "loss": 0.6161, "step": 3680 }, { "epoch": 0.4925073588439925, "grad_norm": 22.925416946411133, "learning_rate": 5.074926411560075e-07, "loss": 0.2644, "step": 3681 }, { "epoch": 0.49264115600749264, "grad_norm": 27.756549835205078, "learning_rate": 5.073588439925074e-07, "loss": 0.5409, "step": 3682 }, { "epoch": 0.4927749531709928, "grad_norm": 15.055912971496582, "learning_rate": 5.072250468290072e-07, "loss": 0.1778, "step": 3683 }, { "epoch": 0.4929087503344929, "grad_norm": 51.09224319458008, "learning_rate": 5.070912496655071e-07, "loss": 0.7286, "step": 3684 }, { "epoch": 0.49304254749799303, "grad_norm": 17.101850509643555, "learning_rate": 5.06957452502007e-07, "loss": 0.4737, "step": 3685 }, { "epoch": 0.4931763446614932, "grad_norm": 25.484737396240234, "learning_rate": 5.068236553385068e-07, "loss": 0.5075, "step": 3686 }, { "epoch": 0.49331014182499333, "grad_norm": 10.008910179138184, "learning_rate": 5.066898581750066e-07, "loss": 0.1563, "step": 3687 }, { "epoch": 0.4934439389884934, "grad_norm": 14.738155364990234, "learning_rate": 5.065560610115065e-07, "loss": 0.4124, "step": 3688 }, { "epoch": 0.4935777361519936, "grad_norm": 37.02375793457031, "learning_rate": 5.064222638480063e-07, "loss": 0.6565, "step": 3689 }, { "epoch": 0.4937115333154937, "grad_norm": 23.669330596923828, "learning_rate": 5.062884666845063e-07, "loss": 0.3067, "step": 3690 }, { "epoch": 0.49384533047899387, "grad_norm": 27.9422664642334, "learning_rate": 5.061546695210062e-07, "loss": 0.4781, "step": 3691 }, { "epoch": 0.49397912764249396, "grad_norm": 19.136018753051758, "learning_rate": 5.06020872357506e-07, "loss": 0.4114, "step": 3692 }, { "epoch": 0.4941129248059941, "grad_norm": 19.561464309692383, "learning_rate": 5.058870751940058e-07, "loss": 0.3577, "step": 3693 }, { "epoch": 0.49424672196949426, "grad_norm": 31.539432525634766, "learning_rate": 5.057532780305057e-07, "loss": 0.4193, "step": 3694 }, { "epoch": 0.49438051913299436, "grad_norm": 34.058753967285156, "learning_rate": 5.056194808670056e-07, "loss": 0.6248, "step": 3695 }, { "epoch": 0.4945143162964945, "grad_norm": 15.199077606201172, "learning_rate": 5.054856837035055e-07, "loss": 0.4025, "step": 3696 }, { "epoch": 0.49464811345999465, "grad_norm": 17.029787063598633, "learning_rate": 5.053518865400053e-07, "loss": 0.4667, "step": 3697 }, { "epoch": 0.4947819106234948, "grad_norm": 32.500083923339844, "learning_rate": 5.052180893765052e-07, "loss": 0.3661, "step": 3698 }, { "epoch": 0.4949157077869949, "grad_norm": 29.44215202331543, "learning_rate": 5.050842922130051e-07, "loss": 0.7646, "step": 3699 }, { "epoch": 0.49504950495049505, "grad_norm": 14.35242748260498, "learning_rate": 5.04950495049505e-07, "loss": 0.3856, "step": 3700 }, { "epoch": 0.4951833021139952, "grad_norm": 18.104761123657227, "learning_rate": 5.048166978860047e-07, "loss": 0.5351, "step": 3701 }, { "epoch": 0.49531709927749534, "grad_norm": 18.99884605407715, "learning_rate": 5.046829007225046e-07, "loss": 0.4499, "step": 3702 }, { "epoch": 0.49545089644099544, "grad_norm": 39.56483840942383, "learning_rate": 5.045491035590045e-07, "loss": 0.3986, "step": 3703 }, { "epoch": 0.4955846936044956, "grad_norm": 15.128981590270996, "learning_rate": 5.044153063955045e-07, "loss": 0.4304, "step": 3704 }, { "epoch": 0.49571849076799573, "grad_norm": 27.8256778717041, "learning_rate": 5.042815092320043e-07, "loss": 0.3714, "step": 3705 }, { "epoch": 0.4958522879314958, "grad_norm": 30.16381072998047, "learning_rate": 5.041477120685041e-07, "loss": 0.5678, "step": 3706 }, { "epoch": 0.495986085094996, "grad_norm": 15.008970260620117, "learning_rate": 5.04013914905004e-07, "loss": 0.4412, "step": 3707 }, { "epoch": 0.4961198822584961, "grad_norm": 20.75800132751465, "learning_rate": 5.038801177415039e-07, "loss": 0.4703, "step": 3708 }, { "epoch": 0.4962536794219963, "grad_norm": 21.737564086914062, "learning_rate": 5.037463205780037e-07, "loss": 0.2194, "step": 3709 }, { "epoch": 0.49638747658549637, "grad_norm": 11.711509704589844, "learning_rate": 5.036125234145036e-07, "loss": 0.4053, "step": 3710 }, { "epoch": 0.4965212737489965, "grad_norm": 11.930798530578613, "learning_rate": 5.034787262510034e-07, "loss": 0.4103, "step": 3711 }, { "epoch": 0.49665507091249667, "grad_norm": 43.667205810546875, "learning_rate": 5.033449290875034e-07, "loss": 0.6941, "step": 3712 }, { "epoch": 0.4967888680759968, "grad_norm": 23.765548706054688, "learning_rate": 5.032111319240032e-07, "loss": 0.4979, "step": 3713 }, { "epoch": 0.4969226652394969, "grad_norm": 28.79509735107422, "learning_rate": 5.030773347605031e-07, "loss": 0.6146, "step": 3714 }, { "epoch": 0.49705646240299706, "grad_norm": 10.510614395141602, "learning_rate": 5.02943537597003e-07, "loss": 0.3788, "step": 3715 }, { "epoch": 0.4971902595664972, "grad_norm": 20.096017837524414, "learning_rate": 5.028097404335027e-07, "loss": 0.2786, "step": 3716 }, { "epoch": 0.4973240567299973, "grad_norm": 16.32339859008789, "learning_rate": 5.026759432700026e-07, "loss": 0.5015, "step": 3717 }, { "epoch": 0.49745785389349745, "grad_norm": 25.49803924560547, "learning_rate": 5.025421461065025e-07, "loss": 0.3902, "step": 3718 }, { "epoch": 0.4975916510569976, "grad_norm": 20.43900489807129, "learning_rate": 5.024083489430025e-07, "loss": 0.5349, "step": 3719 }, { "epoch": 0.49772544822049775, "grad_norm": 21.40087127685547, "learning_rate": 5.022745517795022e-07, "loss": 0.5854, "step": 3720 }, { "epoch": 0.49785924538399784, "grad_norm": 21.87727928161621, "learning_rate": 5.021407546160021e-07, "loss": 0.5038, "step": 3721 }, { "epoch": 0.497993042547498, "grad_norm": 31.464839935302734, "learning_rate": 5.02006957452502e-07, "loss": 0.4772, "step": 3722 }, { "epoch": 0.49812683971099814, "grad_norm": 17.71463394165039, "learning_rate": 5.018731602890019e-07, "loss": 0.4561, "step": 3723 }, { "epoch": 0.4982606368744983, "grad_norm": 19.746585845947266, "learning_rate": 5.017393631255017e-07, "loss": 0.3469, "step": 3724 }, { "epoch": 0.4983944340379984, "grad_norm": 15.174617767333984, "learning_rate": 5.016055659620015e-07, "loss": 0.4444, "step": 3725 }, { "epoch": 0.49852823120149853, "grad_norm": 25.074993133544922, "learning_rate": 5.014717687985015e-07, "loss": 0.356, "step": 3726 }, { "epoch": 0.4986620283649987, "grad_norm": 25.14922523498535, "learning_rate": 5.013379716350014e-07, "loss": 0.4641, "step": 3727 }, { "epoch": 0.49879582552849877, "grad_norm": 21.665977478027344, "learning_rate": 5.012041744715012e-07, "loss": 0.3807, "step": 3728 }, { "epoch": 0.4989296226919989, "grad_norm": 30.378116607666016, "learning_rate": 5.01070377308001e-07, "loss": 0.6117, "step": 3729 }, { "epoch": 0.49906341985549907, "grad_norm": 27.89266586303711, "learning_rate": 5.009365801445009e-07, "loss": 0.4906, "step": 3730 }, { "epoch": 0.4991972170189992, "grad_norm": 16.075977325439453, "learning_rate": 5.008027829810008e-07, "loss": 0.4234, "step": 3731 }, { "epoch": 0.4993310141824993, "grad_norm": 14.641469955444336, "learning_rate": 5.006689858175006e-07, "loss": 0.4633, "step": 3732 }, { "epoch": 0.49946481134599946, "grad_norm": 32.62420654296875, "learning_rate": 5.005351886540005e-07, "loss": 0.5246, "step": 3733 }, { "epoch": 0.4995986085094996, "grad_norm": 12.316533088684082, "learning_rate": 5.004013914905004e-07, "loss": 0.4563, "step": 3734 }, { "epoch": 0.49973240567299976, "grad_norm": 22.943038940429688, "learning_rate": 5.002675943270003e-07, "loss": 0.3561, "step": 3735 }, { "epoch": 0.49986620283649985, "grad_norm": 13.77337646484375, "learning_rate": 5.001337971635001e-07, "loss": 0.2935, "step": 3736 }, { "epoch": 0.5, "grad_norm": 25.846773147583008, "learning_rate": 5e-07, "loss": 0.3831, "step": 3737 }, { "epoch": 0.5001337971635001, "grad_norm": 30.69327735900879, "learning_rate": 4.998662028364999e-07, "loss": 0.4808, "step": 3738 }, { "epoch": 0.5002675943270003, "grad_norm": 24.043336868286133, "learning_rate": 4.997324056729997e-07, "loss": 0.623, "step": 3739 }, { "epoch": 0.5004013914905004, "grad_norm": 40.04366683959961, "learning_rate": 4.995986085094996e-07, "loss": 0.4163, "step": 3740 }, { "epoch": 0.5005351886540005, "grad_norm": 25.884597778320312, "learning_rate": 4.994648113459994e-07, "loss": 0.4819, "step": 3741 }, { "epoch": 0.5006689858175006, "grad_norm": 18.02483367919922, "learning_rate": 4.993310141824993e-07, "loss": 0.4182, "step": 3742 }, { "epoch": 0.5008027829810008, "grad_norm": 29.845598220825195, "learning_rate": 4.991972170189991e-07, "loss": 0.3585, "step": 3743 }, { "epoch": 0.5009365801445009, "grad_norm": 26.576461791992188, "learning_rate": 4.99063419855499e-07, "loss": 0.2438, "step": 3744 }, { "epoch": 0.5010703773080011, "grad_norm": 31.425527572631836, "learning_rate": 4.989296226919989e-07, "loss": 0.5208, "step": 3745 }, { "epoch": 0.5012041744715012, "grad_norm": 16.585834503173828, "learning_rate": 4.987958255284988e-07, "loss": 0.3479, "step": 3746 }, { "epoch": 0.5013379716350014, "grad_norm": 26.51902198791504, "learning_rate": 4.986620283649987e-07, "loss": 0.4422, "step": 3747 }, { "epoch": 0.5014717687985015, "grad_norm": 10.418330192565918, "learning_rate": 4.985282312014985e-07, "loss": 0.2772, "step": 3748 }, { "epoch": 0.5016055659620016, "grad_norm": 43.72157287597656, "learning_rate": 4.983944340379983e-07, "loss": 0.5705, "step": 3749 }, { "epoch": 0.5017393631255017, "grad_norm": 29.717771530151367, "learning_rate": 4.982606368744983e-07, "loss": 0.1823, "step": 3750 }, { "epoch": 0.5018731602890019, "grad_norm": 23.409542083740234, "learning_rate": 4.981268397109981e-07, "loss": 0.4968, "step": 3751 }, { "epoch": 0.502006957452502, "grad_norm": 28.517311096191406, "learning_rate": 4.97993042547498e-07, "loss": 0.4737, "step": 3752 }, { "epoch": 0.5021407546160022, "grad_norm": 23.862886428833008, "learning_rate": 4.978592453839978e-07, "loss": 0.4716, "step": 3753 }, { "epoch": 0.5022745517795023, "grad_norm": 35.04753112792969, "learning_rate": 4.977254482204977e-07, "loss": 0.4129, "step": 3754 }, { "epoch": 0.5024083489430025, "grad_norm": 30.794824600219727, "learning_rate": 4.975916510569976e-07, "loss": 0.6357, "step": 3755 }, { "epoch": 0.5025421461065025, "grad_norm": 19.665359497070312, "learning_rate": 4.974578538934975e-07, "loss": 0.4061, "step": 3756 }, { "epoch": 0.5026759432700026, "grad_norm": 23.09714698791504, "learning_rate": 4.973240567299972e-07, "loss": 0.487, "step": 3757 }, { "epoch": 0.5028097404335028, "grad_norm": 22.54517364501953, "learning_rate": 4.971902595664972e-07, "loss": 0.6009, "step": 3758 }, { "epoch": 0.5029435375970029, "grad_norm": 31.66373634338379, "learning_rate": 4.97056462402997e-07, "loss": 0.5361, "step": 3759 }, { "epoch": 0.5030773347605031, "grad_norm": 46.79549789428711, "learning_rate": 4.969226652394969e-07, "loss": 0.5876, "step": 3760 }, { "epoch": 0.5032111319240032, "grad_norm": 18.12544822692871, "learning_rate": 4.967888680759968e-07, "loss": 0.4945, "step": 3761 }, { "epoch": 0.5033449290875034, "grad_norm": 26.429157257080078, "learning_rate": 4.966550709124966e-07, "loss": 0.6753, "step": 3762 }, { "epoch": 0.5034787262510034, "grad_norm": 31.957904815673828, "learning_rate": 4.965212737489965e-07, "loss": 0.3591, "step": 3763 }, { "epoch": 0.5036125234145036, "grad_norm": 27.14683723449707, "learning_rate": 4.963874765854963e-07, "loss": 0.323, "step": 3764 }, { "epoch": 0.5037463205780037, "grad_norm": 14.84345531463623, "learning_rate": 4.962536794219963e-07, "loss": 0.2782, "step": 3765 }, { "epoch": 0.5038801177415039, "grad_norm": 25.208337783813477, "learning_rate": 4.96119882258496e-07, "loss": 0.5723, "step": 3766 }, { "epoch": 0.504013914905004, "grad_norm": 39.96840286254883, "learning_rate": 4.959860850949959e-07, "loss": 0.3897, "step": 3767 }, { "epoch": 0.5041477120685042, "grad_norm": 31.2127742767334, "learning_rate": 4.958522879314958e-07, "loss": 0.5257, "step": 3768 }, { "epoch": 0.5042815092320043, "grad_norm": 21.82288932800293, "learning_rate": 4.957184907679957e-07, "loss": 0.545, "step": 3769 }, { "epoch": 0.5044153063955045, "grad_norm": 17.40373992919922, "learning_rate": 4.955846936044956e-07, "loss": 0.5469, "step": 3770 }, { "epoch": 0.5045491035590045, "grad_norm": 24.328433990478516, "learning_rate": 4.954508964409954e-07, "loss": 0.4343, "step": 3771 }, { "epoch": 0.5046829007225047, "grad_norm": 16.65837287902832, "learning_rate": 4.953170992774953e-07, "loss": 0.4208, "step": 3772 }, { "epoch": 0.5048166978860048, "grad_norm": 12.738600730895996, "learning_rate": 4.951833021139952e-07, "loss": 0.4482, "step": 3773 }, { "epoch": 0.504950495049505, "grad_norm": 28.350671768188477, "learning_rate": 4.95049504950495e-07, "loss": 0.554, "step": 3774 }, { "epoch": 0.5050842922130051, "grad_norm": 30.193511962890625, "learning_rate": 4.949157077869949e-07, "loss": 0.4917, "step": 3775 }, { "epoch": 0.5052180893765053, "grad_norm": 18.077590942382812, "learning_rate": 4.947819106234947e-07, "loss": 0.3293, "step": 3776 }, { "epoch": 0.5053518865400054, "grad_norm": 25.45497703552246, "learning_rate": 4.946481134599946e-07, "loss": 0.4, "step": 3777 }, { "epoch": 0.5054856837035054, "grad_norm": 20.93058204650879, "learning_rate": 4.945143162964945e-07, "loss": 0.2174, "step": 3778 }, { "epoch": 0.5056194808670056, "grad_norm": 25.35149574279785, "learning_rate": 4.943805191329944e-07, "loss": 0.4133, "step": 3779 }, { "epoch": 0.5057532780305057, "grad_norm": 19.20042610168457, "learning_rate": 4.942467219694943e-07, "loss": 0.4895, "step": 3780 }, { "epoch": 0.5058870751940059, "grad_norm": 50.823978424072266, "learning_rate": 4.941129248059941e-07, "loss": 0.6796, "step": 3781 }, { "epoch": 0.506020872357506, "grad_norm": 33.587467193603516, "learning_rate": 4.939791276424939e-07, "loss": 0.5387, "step": 3782 }, { "epoch": 0.5061546695210062, "grad_norm": 21.12697410583496, "learning_rate": 4.938453304789939e-07, "loss": 0.369, "step": 3783 }, { "epoch": 0.5062884666845063, "grad_norm": 24.85024070739746, "learning_rate": 4.937115333154937e-07, "loss": 0.5544, "step": 3784 }, { "epoch": 0.5064222638480064, "grad_norm": 18.86993408203125, "learning_rate": 4.935777361519935e-07, "loss": 0.2192, "step": 3785 }, { "epoch": 0.5065560610115065, "grad_norm": 16.46368980407715, "learning_rate": 4.934439389884934e-07, "loss": 0.4413, "step": 3786 }, { "epoch": 0.5066898581750067, "grad_norm": 13.511467933654785, "learning_rate": 4.933101418249933e-07, "loss": 0.4045, "step": 3787 }, { "epoch": 0.5068236553385068, "grad_norm": 12.741935729980469, "learning_rate": 4.931763446614932e-07, "loss": 0.3728, "step": 3788 }, { "epoch": 0.506957452502007, "grad_norm": 32.645408630371094, "learning_rate": 4.93042547497993e-07, "loss": 0.6154, "step": 3789 }, { "epoch": 0.5070912496655071, "grad_norm": 19.002641677856445, "learning_rate": 4.929087503344928e-07, "loss": 0.2593, "step": 3790 }, { "epoch": 0.5072250468290073, "grad_norm": 24.6074161529541, "learning_rate": 4.927749531709927e-07, "loss": 0.5507, "step": 3791 }, { "epoch": 0.5073588439925074, "grad_norm": 17.581336975097656, "learning_rate": 4.926411560074926e-07, "loss": 0.3724, "step": 3792 }, { "epoch": 0.5074926411560075, "grad_norm": 19.9117374420166, "learning_rate": 4.925073588439925e-07, "loss": 0.4109, "step": 3793 }, { "epoch": 0.5076264383195076, "grad_norm": 17.877046585083008, "learning_rate": 4.923735616804924e-07, "loss": 0.2467, "step": 3794 }, { "epoch": 0.5077602354830077, "grad_norm": 18.05330467224121, "learning_rate": 4.922397645169922e-07, "loss": 0.3675, "step": 3795 }, { "epoch": 0.5078940326465079, "grad_norm": 24.286380767822266, "learning_rate": 4.921059673534921e-07, "loss": 0.4472, "step": 3796 }, { "epoch": 0.508027829810008, "grad_norm": 22.684627532958984, "learning_rate": 4.919721701899919e-07, "loss": 0.4992, "step": 3797 }, { "epoch": 0.5081616269735082, "grad_norm": 33.14812469482422, "learning_rate": 4.918383730264919e-07, "loss": 0.5693, "step": 3798 }, { "epoch": 0.5082954241370083, "grad_norm": 18.29520606994629, "learning_rate": 4.917045758629916e-07, "loss": 0.3686, "step": 3799 }, { "epoch": 0.5084292213005084, "grad_norm": 24.253387451171875, "learning_rate": 4.915707786994915e-07, "loss": 0.5898, "step": 3800 }, { "epoch": 0.5085630184640085, "grad_norm": 29.85413360595703, "learning_rate": 4.914369815359914e-07, "loss": 0.3408, "step": 3801 }, { "epoch": 0.5086968156275087, "grad_norm": 29.269760131835938, "learning_rate": 4.913031843724913e-07, "loss": 0.4642, "step": 3802 }, { "epoch": 0.5088306127910088, "grad_norm": 41.17617416381836, "learning_rate": 4.911693872089912e-07, "loss": 0.3584, "step": 3803 }, { "epoch": 0.508964409954509, "grad_norm": 34.63137435913086, "learning_rate": 4.91035590045491e-07, "loss": 0.6048, "step": 3804 }, { "epoch": 0.5090982071180091, "grad_norm": 26.49266815185547, "learning_rate": 4.909017928819909e-07, "loss": 0.6543, "step": 3805 }, { "epoch": 0.5092320042815093, "grad_norm": 28.60620880126953, "learning_rate": 4.907679957184908e-07, "loss": 0.4967, "step": 3806 }, { "epoch": 0.5093658014450093, "grad_norm": 19.431697845458984, "learning_rate": 4.906341985549906e-07, "loss": 0.2462, "step": 3807 }, { "epoch": 0.5094995986085095, "grad_norm": 13.483646392822266, "learning_rate": 4.905004013914905e-07, "loss": 0.3096, "step": 3808 }, { "epoch": 0.5096333957720096, "grad_norm": 20.39915657043457, "learning_rate": 4.903666042279903e-07, "loss": 0.4879, "step": 3809 }, { "epoch": 0.5097671929355098, "grad_norm": 22.732940673828125, "learning_rate": 4.902328070644902e-07, "loss": 0.4417, "step": 3810 }, { "epoch": 0.5099009900990099, "grad_norm": 29.376935958862305, "learning_rate": 4.900990099009901e-07, "loss": 0.5753, "step": 3811 }, { "epoch": 0.5100347872625101, "grad_norm": 19.06939125061035, "learning_rate": 4.8996521273749e-07, "loss": 0.2752, "step": 3812 }, { "epoch": 0.5101685844260102, "grad_norm": 21.199363708496094, "learning_rate": 4.898314155739898e-07, "loss": 0.3812, "step": 3813 }, { "epoch": 0.5103023815895104, "grad_norm": 20.515117645263672, "learning_rate": 4.896976184104897e-07, "loss": 0.4697, "step": 3814 }, { "epoch": 0.5104361787530104, "grad_norm": 19.54273796081543, "learning_rate": 4.895638212469895e-07, "loss": 0.5007, "step": 3815 }, { "epoch": 0.5105699759165105, "grad_norm": 17.066377639770508, "learning_rate": 4.894300240834895e-07, "loss": 0.4817, "step": 3816 }, { "epoch": 0.5107037730800107, "grad_norm": 20.500282287597656, "learning_rate": 4.892962269199893e-07, "loss": 0.4594, "step": 3817 }, { "epoch": 0.5108375702435108, "grad_norm": 29.09540367126465, "learning_rate": 4.891624297564891e-07, "loss": 0.4923, "step": 3818 }, { "epoch": 0.510971367407011, "grad_norm": 28.24904441833496, "learning_rate": 4.89028632592989e-07, "loss": 0.5171, "step": 3819 }, { "epoch": 0.5111051645705111, "grad_norm": 21.684181213378906, "learning_rate": 4.888948354294889e-07, "loss": 0.3765, "step": 3820 }, { "epoch": 0.5112389617340113, "grad_norm": 36.264068603515625, "learning_rate": 4.887610382659888e-07, "loss": 0.6143, "step": 3821 }, { "epoch": 0.5113727588975113, "grad_norm": 19.26374053955078, "learning_rate": 4.886272411024886e-07, "loss": 0.5024, "step": 3822 }, { "epoch": 0.5115065560610115, "grad_norm": 19.495838165283203, "learning_rate": 4.884934439389884e-07, "loss": 0.4653, "step": 3823 }, { "epoch": 0.5116403532245116, "grad_norm": 30.618122100830078, "learning_rate": 4.883596467754883e-07, "loss": 0.4814, "step": 3824 }, { "epoch": 0.5117741503880118, "grad_norm": 22.597246170043945, "learning_rate": 4.882258496119882e-07, "loss": 0.517, "step": 3825 }, { "epoch": 0.5119079475515119, "grad_norm": 26.363723754882812, "learning_rate": 4.880920524484881e-07, "loss": 0.4883, "step": 3826 }, { "epoch": 0.5120417447150121, "grad_norm": 16.17058563232422, "learning_rate": 4.879582552849879e-07, "loss": 0.3628, "step": 3827 }, { "epoch": 0.5121755418785122, "grad_norm": 22.655569076538086, "learning_rate": 4.878244581214878e-07, "loss": 0.2769, "step": 3828 }, { "epoch": 0.5123093390420123, "grad_norm": 16.17945671081543, "learning_rate": 4.876906609579877e-07, "loss": 0.2837, "step": 3829 }, { "epoch": 0.5124431362055124, "grad_norm": 16.229040145874023, "learning_rate": 4.875568637944875e-07, "loss": 0.3618, "step": 3830 }, { "epoch": 0.5125769333690126, "grad_norm": 30.523771286010742, "learning_rate": 4.874230666309875e-07, "loss": 0.3494, "step": 3831 }, { "epoch": 0.5127107305325127, "grad_norm": 30.234285354614258, "learning_rate": 4.872892694674872e-07, "loss": 0.3624, "step": 3832 }, { "epoch": 0.5128445276960129, "grad_norm": 18.918441772460938, "learning_rate": 4.871554723039871e-07, "loss": 0.3567, "step": 3833 }, { "epoch": 0.512978324859513, "grad_norm": 35.88613510131836, "learning_rate": 4.87021675140487e-07, "loss": 0.5298, "step": 3834 }, { "epoch": 0.5131121220230132, "grad_norm": 17.761552810668945, "learning_rate": 4.868878779769869e-07, "loss": 0.3332, "step": 3835 }, { "epoch": 0.5132459191865133, "grad_norm": 23.422958374023438, "learning_rate": 4.867540808134868e-07, "loss": 0.4572, "step": 3836 }, { "epoch": 0.5133797163500133, "grad_norm": 21.16748046875, "learning_rate": 4.866202836499866e-07, "loss": 0.4426, "step": 3837 }, { "epoch": 0.5135135135135135, "grad_norm": 32.58464813232422, "learning_rate": 4.864864864864865e-07, "loss": 0.4389, "step": 3838 }, { "epoch": 0.5136473106770136, "grad_norm": 43.258148193359375, "learning_rate": 4.863526893229864e-07, "loss": 0.6053, "step": 3839 }, { "epoch": 0.5137811078405138, "grad_norm": 12.422855377197266, "learning_rate": 4.862188921594862e-07, "loss": 0.2977, "step": 3840 }, { "epoch": 0.5139149050040139, "grad_norm": 51.747962951660156, "learning_rate": 4.86085094995986e-07, "loss": 0.4333, "step": 3841 }, { "epoch": 0.5140487021675141, "grad_norm": 21.419233322143555, "learning_rate": 4.859512978324859e-07, "loss": 0.288, "step": 3842 }, { "epoch": 0.5141824993310142, "grad_norm": 20.203784942626953, "learning_rate": 4.858175006689858e-07, "loss": 0.3744, "step": 3843 }, { "epoch": 0.5143162964945143, "grad_norm": 21.949237823486328, "learning_rate": 4.856837035054857e-07, "loss": 0.4748, "step": 3844 }, { "epoch": 0.5144500936580144, "grad_norm": 47.2047119140625, "learning_rate": 4.855499063419855e-07, "loss": 0.641, "step": 3845 }, { "epoch": 0.5145838908215146, "grad_norm": 26.40825653076172, "learning_rate": 4.854161091784854e-07, "loss": 0.6307, "step": 3846 }, { "epoch": 0.5147176879850147, "grad_norm": 21.362071990966797, "learning_rate": 4.852823120149852e-07, "loss": 0.344, "step": 3847 }, { "epoch": 0.5148514851485149, "grad_norm": 38.904422760009766, "learning_rate": 4.851485148514851e-07, "loss": 0.6845, "step": 3848 }, { "epoch": 0.514985282312015, "grad_norm": 25.34105682373047, "learning_rate": 4.85014717687985e-07, "loss": 0.4369, "step": 3849 }, { "epoch": 0.5151190794755152, "grad_norm": 21.24798583984375, "learning_rate": 4.848809205244849e-07, "loss": 0.5312, "step": 3850 }, { "epoch": 0.5152528766390152, "grad_norm": 14.127843856811523, "learning_rate": 4.847471233609847e-07, "loss": 0.3068, "step": 3851 }, { "epoch": 0.5153866738025153, "grad_norm": 26.549964904785156, "learning_rate": 4.846133261974846e-07, "loss": 0.4751, "step": 3852 }, { "epoch": 0.5155204709660155, "grad_norm": 22.688508987426758, "learning_rate": 4.844795290339845e-07, "loss": 0.4322, "step": 3853 }, { "epoch": 0.5156542681295156, "grad_norm": 50.162437438964844, "learning_rate": 4.843457318704844e-07, "loss": 0.3738, "step": 3854 }, { "epoch": 0.5157880652930158, "grad_norm": 23.484209060668945, "learning_rate": 4.842119347069841e-07, "loss": 0.5467, "step": 3855 }, { "epoch": 0.515921862456516, "grad_norm": 29.662086486816406, "learning_rate": 4.84078137543484e-07, "loss": 0.4459, "step": 3856 }, { "epoch": 0.5160556596200161, "grad_norm": 55.21876525878906, "learning_rate": 4.839443403799839e-07, "loss": 0.3349, "step": 3857 }, { "epoch": 0.5161894567835162, "grad_norm": 22.148448944091797, "learning_rate": 4.838105432164838e-07, "loss": 0.3469, "step": 3858 }, { "epoch": 0.5163232539470163, "grad_norm": 29.085424423217773, "learning_rate": 4.836767460529837e-07, "loss": 0.7354, "step": 3859 }, { "epoch": 0.5164570511105164, "grad_norm": 38.79560852050781, "learning_rate": 4.835429488894835e-07, "loss": 0.4064, "step": 3860 }, { "epoch": 0.5165908482740166, "grad_norm": 19.248550415039062, "learning_rate": 4.834091517259834e-07, "loss": 0.3676, "step": 3861 }, { "epoch": 0.5167246454375167, "grad_norm": 18.098817825317383, "learning_rate": 4.832753545624833e-07, "loss": 0.3374, "step": 3862 }, { "epoch": 0.5168584426010169, "grad_norm": 49.434593200683594, "learning_rate": 4.831415573989831e-07, "loss": 0.5494, "step": 3863 }, { "epoch": 0.516992239764517, "grad_norm": 15.841862678527832, "learning_rate": 4.830077602354831e-07, "loss": 0.2686, "step": 3864 }, { "epoch": 0.5171260369280172, "grad_norm": 23.453372955322266, "learning_rate": 4.828739630719828e-07, "loss": 0.3008, "step": 3865 }, { "epoch": 0.5172598340915172, "grad_norm": 65.3414077758789, "learning_rate": 4.827401659084827e-07, "loss": 0.8086, "step": 3866 }, { "epoch": 0.5173936312550174, "grad_norm": 14.873062133789062, "learning_rate": 4.826063687449826e-07, "loss": 0.2686, "step": 3867 }, { "epoch": 0.5175274284185175, "grad_norm": 23.63041114807129, "learning_rate": 4.824725715814825e-07, "loss": 0.6109, "step": 3868 }, { "epoch": 0.5176612255820177, "grad_norm": 18.852161407470703, "learning_rate": 4.823387744179823e-07, "loss": 0.5305, "step": 3869 }, { "epoch": 0.5177950227455178, "grad_norm": 18.82980728149414, "learning_rate": 4.822049772544821e-07, "loss": 0.3144, "step": 3870 }, { "epoch": 0.517928819909018, "grad_norm": 28.12152862548828, "learning_rate": 4.820711800909821e-07, "loss": 0.4986, "step": 3871 }, { "epoch": 0.5180626170725181, "grad_norm": 11.564746856689453, "learning_rate": 4.819373829274819e-07, "loss": 0.2759, "step": 3872 }, { "epoch": 0.5181964142360181, "grad_norm": 24.601951599121094, "learning_rate": 4.818035857639818e-07, "loss": 0.3351, "step": 3873 }, { "epoch": 0.5183302113995183, "grad_norm": 18.23526954650879, "learning_rate": 4.816697886004816e-07, "loss": 0.2477, "step": 3874 }, { "epoch": 0.5184640085630184, "grad_norm": 49.888729095458984, "learning_rate": 4.815359914369815e-07, "loss": 0.5701, "step": 3875 }, { "epoch": 0.5185978057265186, "grad_norm": 20.302961349487305, "learning_rate": 4.814021942734814e-07, "loss": 0.44, "step": 3876 }, { "epoch": 0.5187316028900187, "grad_norm": 23.00055694580078, "learning_rate": 4.812683971099813e-07, "loss": 0.3809, "step": 3877 }, { "epoch": 0.5188654000535189, "grad_norm": 25.24318504333496, "learning_rate": 4.81134599946481e-07, "loss": 0.4879, "step": 3878 }, { "epoch": 0.518999197217019, "grad_norm": 38.963279724121094, "learning_rate": 4.81000802782981e-07, "loss": 0.468, "step": 3879 }, { "epoch": 0.5191329943805192, "grad_norm": 18.916715621948242, "learning_rate": 4.808670056194808e-07, "loss": 0.4257, "step": 3880 }, { "epoch": 0.5192667915440192, "grad_norm": 17.761133193969727, "learning_rate": 4.807332084559807e-07, "loss": 0.3766, "step": 3881 }, { "epoch": 0.5194005887075194, "grad_norm": 32.638282775878906, "learning_rate": 4.805994112924806e-07, "loss": 0.4214, "step": 3882 }, { "epoch": 0.5195343858710195, "grad_norm": 26.486875534057617, "learning_rate": 4.804656141289804e-07, "loss": 0.5541, "step": 3883 }, { "epoch": 0.5196681830345197, "grad_norm": 22.92817497253418, "learning_rate": 4.803318169654803e-07, "loss": 0.4058, "step": 3884 }, { "epoch": 0.5198019801980198, "grad_norm": 14.478473663330078, "learning_rate": 4.801980198019802e-07, "loss": 0.3891, "step": 3885 }, { "epoch": 0.51993577736152, "grad_norm": 22.344465255737305, "learning_rate": 4.800642226384801e-07, "loss": 0.3358, "step": 3886 }, { "epoch": 0.5200695745250201, "grad_norm": 26.60009765625, "learning_rate": 4.7993042547498e-07, "loss": 0.3422, "step": 3887 }, { "epoch": 0.5202033716885202, "grad_norm": 14.619505882263184, "learning_rate": 4.797966283114797e-07, "loss": 0.3801, "step": 3888 }, { "epoch": 0.5203371688520203, "grad_norm": 23.19802474975586, "learning_rate": 4.796628311479796e-07, "loss": 0.4045, "step": 3889 }, { "epoch": 0.5204709660155205, "grad_norm": 22.81229019165039, "learning_rate": 4.795290339844795e-07, "loss": 0.5152, "step": 3890 }, { "epoch": 0.5206047631790206, "grad_norm": 38.193992614746094, "learning_rate": 4.793952368209794e-07, "loss": 0.5582, "step": 3891 }, { "epoch": 0.5207385603425208, "grad_norm": 22.856983184814453, "learning_rate": 4.792614396574793e-07, "loss": 0.486, "step": 3892 }, { "epoch": 0.5208723575060209, "grad_norm": 42.32805252075195, "learning_rate": 4.79127642493979e-07, "loss": 0.321, "step": 3893 }, { "epoch": 0.521006154669521, "grad_norm": 17.942974090576172, "learning_rate": 4.78993845330479e-07, "loss": 0.4297, "step": 3894 }, { "epoch": 0.5211399518330211, "grad_norm": 18.352672576904297, "learning_rate": 4.788600481669788e-07, "loss": 0.3894, "step": 3895 }, { "epoch": 0.5212737489965212, "grad_norm": 27.524614334106445, "learning_rate": 4.787262510034787e-07, "loss": 0.362, "step": 3896 }, { "epoch": 0.5214075461600214, "grad_norm": 16.72163963317871, "learning_rate": 4.785924538399785e-07, "loss": 0.2461, "step": 3897 }, { "epoch": 0.5215413433235215, "grad_norm": 15.275517463684082, "learning_rate": 4.784586566764784e-07, "loss": 0.4128, "step": 3898 }, { "epoch": 0.5216751404870217, "grad_norm": 23.88323974609375, "learning_rate": 4.783248595129783e-07, "loss": 0.4885, "step": 3899 }, { "epoch": 0.5218089376505218, "grad_norm": 26.832883834838867, "learning_rate": 4.781910623494782e-07, "loss": 0.4975, "step": 3900 }, { "epoch": 0.521942734814022, "grad_norm": 16.91481590270996, "learning_rate": 4.780572651859781e-07, "loss": 0.3588, "step": 3901 }, { "epoch": 0.5220765319775221, "grad_norm": 17.253400802612305, "learning_rate": 4.779234680224779e-07, "loss": 0.391, "step": 3902 }, { "epoch": 0.5222103291410222, "grad_norm": 58.382999420166016, "learning_rate": 4.777896708589777e-07, "loss": 0.4871, "step": 3903 }, { "epoch": 0.5223441263045223, "grad_norm": 23.210552215576172, "learning_rate": 4.776558736954776e-07, "loss": 0.318, "step": 3904 }, { "epoch": 0.5224779234680225, "grad_norm": 17.114990234375, "learning_rate": 4.775220765319775e-07, "loss": 0.426, "step": 3905 }, { "epoch": 0.5226117206315226, "grad_norm": 31.084678649902344, "learning_rate": 4.773882793684774e-07, "loss": 0.234, "step": 3906 }, { "epoch": 0.5227455177950228, "grad_norm": 19.358186721801758, "learning_rate": 4.772544822049772e-07, "loss": 0.3657, "step": 3907 }, { "epoch": 0.5228793149585229, "grad_norm": 28.884233474731445, "learning_rate": 4.771206850414771e-07, "loss": 0.4063, "step": 3908 }, { "epoch": 0.5230131121220231, "grad_norm": 26.591501235961914, "learning_rate": 4.76986887877977e-07, "loss": 0.4986, "step": 3909 }, { "epoch": 0.5231469092855231, "grad_norm": 23.62676239013672, "learning_rate": 4.768530907144769e-07, "loss": 0.3834, "step": 3910 }, { "epoch": 0.5232807064490232, "grad_norm": 19.20577621459961, "learning_rate": 4.767192935509767e-07, "loss": 0.2094, "step": 3911 }, { "epoch": 0.5234145036125234, "grad_norm": 24.463729858398438, "learning_rate": 4.7658549638747657e-07, "loss": 0.5023, "step": 3912 }, { "epoch": 0.5235483007760235, "grad_norm": 34.24100875854492, "learning_rate": 4.7645169922397645e-07, "loss": 0.464, "step": 3913 }, { "epoch": 0.5236820979395237, "grad_norm": 19.43393898010254, "learning_rate": 4.763179020604763e-07, "loss": 0.3671, "step": 3914 }, { "epoch": 0.5238158951030238, "grad_norm": 34.452117919921875, "learning_rate": 4.7618410489697616e-07, "loss": 0.4255, "step": 3915 }, { "epoch": 0.523949692266524, "grad_norm": 24.17858123779297, "learning_rate": 4.7605030773347604e-07, "loss": 0.3477, "step": 3916 }, { "epoch": 0.524083489430024, "grad_norm": 23.74098014831543, "learning_rate": 4.759165105699759e-07, "loss": 0.6168, "step": 3917 }, { "epoch": 0.5242172865935242, "grad_norm": 23.20895767211914, "learning_rate": 4.7578271340647574e-07, "loss": 0.3604, "step": 3918 }, { "epoch": 0.5243510837570243, "grad_norm": 24.069290161132812, "learning_rate": 4.756489162429757e-07, "loss": 0.4691, "step": 3919 }, { "epoch": 0.5244848809205245, "grad_norm": 16.51789093017578, "learning_rate": 4.755151190794755e-07, "loss": 0.2271, "step": 3920 }, { "epoch": 0.5246186780840246, "grad_norm": 18.215240478515625, "learning_rate": 4.7538132191597533e-07, "loss": 0.4649, "step": 3921 }, { "epoch": 0.5247524752475248, "grad_norm": 23.98566436767578, "learning_rate": 4.752475247524752e-07, "loss": 0.418, "step": 3922 }, { "epoch": 0.5248862724110249, "grad_norm": 16.944425582885742, "learning_rate": 4.751137275889751e-07, "loss": 0.2565, "step": 3923 }, { "epoch": 0.5250200695745251, "grad_norm": 18.031307220458984, "learning_rate": 4.7497993042547497e-07, "loss": 0.3519, "step": 3924 }, { "epoch": 0.5251538667380251, "grad_norm": 26.20342254638672, "learning_rate": 4.748461332619748e-07, "loss": 0.6029, "step": 3925 }, { "epoch": 0.5252876639015253, "grad_norm": 23.596904754638672, "learning_rate": 4.7471233609847467e-07, "loss": 0.3972, "step": 3926 }, { "epoch": 0.5254214610650254, "grad_norm": 70.81377410888672, "learning_rate": 4.7457853893497455e-07, "loss": 0.6176, "step": 3927 }, { "epoch": 0.5255552582285256, "grad_norm": 22.187625885009766, "learning_rate": 4.7444474177147443e-07, "loss": 0.2804, "step": 3928 }, { "epoch": 0.5256890553920257, "grad_norm": 27.29764175415039, "learning_rate": 4.7431094460797426e-07, "loss": 0.511, "step": 3929 }, { "epoch": 0.5258228525555259, "grad_norm": 16.0130558013916, "learning_rate": 4.741771474444742e-07, "loss": 0.2945, "step": 3930 }, { "epoch": 0.525956649719026, "grad_norm": 13.64741325378418, "learning_rate": 4.74043350280974e-07, "loss": 0.4333, "step": 3931 }, { "epoch": 0.526090446882526, "grad_norm": 17.349531173706055, "learning_rate": 4.739095531174739e-07, "loss": 0.411, "step": 3932 }, { "epoch": 0.5262242440460262, "grad_norm": 26.063480377197266, "learning_rate": 4.737757559539737e-07, "loss": 0.216, "step": 3933 }, { "epoch": 0.5263580412095263, "grad_norm": 18.006834030151367, "learning_rate": 4.7364195879047365e-07, "loss": 0.3266, "step": 3934 }, { "epoch": 0.5264918383730265, "grad_norm": 23.029747009277344, "learning_rate": 4.735081616269735e-07, "loss": 0.3897, "step": 3935 }, { "epoch": 0.5266256355365266, "grad_norm": 20.15803337097168, "learning_rate": 4.7337436446347336e-07, "loss": 0.5225, "step": 3936 }, { "epoch": 0.5267594327000268, "grad_norm": 29.10502815246582, "learning_rate": 4.732405672999732e-07, "loss": 0.6849, "step": 3937 }, { "epoch": 0.5268932298635269, "grad_norm": 31.628273010253906, "learning_rate": 4.731067701364731e-07, "loss": 0.3721, "step": 3938 }, { "epoch": 0.527027027027027, "grad_norm": 19.56451416015625, "learning_rate": 4.7297297297297294e-07, "loss": 0.3236, "step": 3939 }, { "epoch": 0.5271608241905271, "grad_norm": 52.22503662109375, "learning_rate": 4.728391758094728e-07, "loss": 0.3644, "step": 3940 }, { "epoch": 0.5272946213540273, "grad_norm": 23.90501594543457, "learning_rate": 4.727053786459727e-07, "loss": 0.4302, "step": 3941 }, { "epoch": 0.5274284185175274, "grad_norm": 23.47974395751953, "learning_rate": 4.725715814824726e-07, "loss": 0.5408, "step": 3942 }, { "epoch": 0.5275622156810276, "grad_norm": 28.76251983642578, "learning_rate": 4.724377843189724e-07, "loss": 0.648, "step": 3943 }, { "epoch": 0.5276960128445277, "grad_norm": 16.45423126220703, "learning_rate": 4.723039871554723e-07, "loss": 0.3931, "step": 3944 }, { "epoch": 0.5278298100080279, "grad_norm": 23.817846298217773, "learning_rate": 4.7217018999197217e-07, "loss": 0.5243, "step": 3945 }, { "epoch": 0.527963607171528, "grad_norm": 19.174880981445312, "learning_rate": 4.7203639282847204e-07, "loss": 0.3089, "step": 3946 }, { "epoch": 0.528097404335028, "grad_norm": 26.53986358642578, "learning_rate": 4.7190259566497187e-07, "loss": 0.5528, "step": 3947 }, { "epoch": 0.5282312014985282, "grad_norm": 27.65624237060547, "learning_rate": 4.717687985014717e-07, "loss": 0.3356, "step": 3948 }, { "epoch": 0.5283649986620284, "grad_norm": 27.384920120239258, "learning_rate": 4.7163500133797163e-07, "loss": 0.4148, "step": 3949 }, { "epoch": 0.5284987958255285, "grad_norm": 25.43934440612793, "learning_rate": 4.7150120417447146e-07, "loss": 0.434, "step": 3950 }, { "epoch": 0.5286325929890286, "grad_norm": 37.629966735839844, "learning_rate": 4.7136740701097134e-07, "loss": 0.389, "step": 3951 }, { "epoch": 0.5287663901525288, "grad_norm": 28.77340316772461, "learning_rate": 4.712336098474712e-07, "loss": 0.5566, "step": 3952 }, { "epoch": 0.528900187316029, "grad_norm": 26.353763580322266, "learning_rate": 4.710998126839711e-07, "loss": 0.3499, "step": 3953 }, { "epoch": 0.529033984479529, "grad_norm": 16.299524307250977, "learning_rate": 4.709660155204709e-07, "loss": 0.3692, "step": 3954 }, { "epoch": 0.5291677816430291, "grad_norm": 18.498876571655273, "learning_rate": 4.708322183569708e-07, "loss": 0.3094, "step": 3955 }, { "epoch": 0.5293015788065293, "grad_norm": 81.37066650390625, "learning_rate": 4.706984211934707e-07, "loss": 0.9334, "step": 3956 }, { "epoch": 0.5294353759700294, "grad_norm": 24.79694938659668, "learning_rate": 4.7056462402997056e-07, "loss": 0.2201, "step": 3957 }, { "epoch": 0.5295691731335296, "grad_norm": 14.290066719055176, "learning_rate": 4.704308268664704e-07, "loss": 0.1858, "step": 3958 }, { "epoch": 0.5297029702970297, "grad_norm": 19.09716796875, "learning_rate": 4.7029702970297026e-07, "loss": 0.3436, "step": 3959 }, { "epoch": 0.5298367674605299, "grad_norm": 28.23158073425293, "learning_rate": 4.7016323253947014e-07, "loss": 0.4162, "step": 3960 }, { "epoch": 0.5299705646240299, "grad_norm": 45.460548400878906, "learning_rate": 4.7002943537597e-07, "loss": 0.6654, "step": 3961 }, { "epoch": 0.5301043617875301, "grad_norm": 26.635841369628906, "learning_rate": 4.6989563821246985e-07, "loss": 0.4643, "step": 3962 }, { "epoch": 0.5302381589510302, "grad_norm": 27.78917694091797, "learning_rate": 4.697618410489698e-07, "loss": 0.553, "step": 3963 }, { "epoch": 0.5303719561145304, "grad_norm": 15.051085472106934, "learning_rate": 4.696280438854696e-07, "loss": 0.3705, "step": 3964 }, { "epoch": 0.5305057532780305, "grad_norm": 20.185150146484375, "learning_rate": 4.694942467219695e-07, "loss": 0.4305, "step": 3965 }, { "epoch": 0.5306395504415307, "grad_norm": 17.46774673461914, "learning_rate": 4.693604495584693e-07, "loss": 0.3535, "step": 3966 }, { "epoch": 0.5307733476050308, "grad_norm": 43.934452056884766, "learning_rate": 4.6922665239496924e-07, "loss": 0.717, "step": 3967 }, { "epoch": 0.530907144768531, "grad_norm": 23.426950454711914, "learning_rate": 4.6909285523146907e-07, "loss": 0.3729, "step": 3968 }, { "epoch": 0.531040941932031, "grad_norm": 17.51937484741211, "learning_rate": 4.6895905806796895e-07, "loss": 0.4077, "step": 3969 }, { "epoch": 0.5311747390955311, "grad_norm": 13.281532287597656, "learning_rate": 4.688252609044688e-07, "loss": 0.3541, "step": 3970 }, { "epoch": 0.5313085362590313, "grad_norm": 26.698997497558594, "learning_rate": 4.686914637409687e-07, "loss": 0.4401, "step": 3971 }, { "epoch": 0.5314423334225314, "grad_norm": 27.12602424621582, "learning_rate": 4.6855766657746854e-07, "loss": 0.2501, "step": 3972 }, { "epoch": 0.5315761305860316, "grad_norm": 20.829357147216797, "learning_rate": 4.6842386941396836e-07, "loss": 0.5282, "step": 3973 }, { "epoch": 0.5317099277495317, "grad_norm": 28.49848747253418, "learning_rate": 4.682900722504683e-07, "loss": 0.3572, "step": 3974 }, { "epoch": 0.5318437249130319, "grad_norm": 21.202381134033203, "learning_rate": 4.681562750869681e-07, "loss": 0.4263, "step": 3975 }, { "epoch": 0.5319775220765319, "grad_norm": 62.22829055786133, "learning_rate": 4.68022477923468e-07, "loss": 0.7568, "step": 3976 }, { "epoch": 0.5321113192400321, "grad_norm": 35.472312927246094, "learning_rate": 4.678886807599678e-07, "loss": 0.5461, "step": 3977 }, { "epoch": 0.5322451164035322, "grad_norm": 13.66837215423584, "learning_rate": 4.6775488359646776e-07, "loss": 0.3015, "step": 3978 }, { "epoch": 0.5323789135670324, "grad_norm": 29.433225631713867, "learning_rate": 4.676210864329676e-07, "loss": 0.4912, "step": 3979 }, { "epoch": 0.5325127107305325, "grad_norm": 19.504980087280273, "learning_rate": 4.6748728926946746e-07, "loss": 0.2977, "step": 3980 }, { "epoch": 0.5326465078940327, "grad_norm": 20.070491790771484, "learning_rate": 4.673534921059673e-07, "loss": 0.5356, "step": 3981 }, { "epoch": 0.5327803050575328, "grad_norm": 14.26370906829834, "learning_rate": 4.672196949424672e-07, "loss": 0.295, "step": 3982 }, { "epoch": 0.5329141022210329, "grad_norm": 22.021669387817383, "learning_rate": 4.6708589777896705e-07, "loss": 0.4119, "step": 3983 }, { "epoch": 0.533047899384533, "grad_norm": 23.416217803955078, "learning_rate": 4.6695210061546693e-07, "loss": 0.5888, "step": 3984 }, { "epoch": 0.5331816965480332, "grad_norm": 21.347509384155273, "learning_rate": 4.668183034519668e-07, "loss": 0.382, "step": 3985 }, { "epoch": 0.5333154937115333, "grad_norm": 24.427812576293945, "learning_rate": 4.666845062884667e-07, "loss": 0.4158, "step": 3986 }, { "epoch": 0.5334492908750335, "grad_norm": 18.365373611450195, "learning_rate": 4.665507091249665e-07, "loss": 0.4464, "step": 3987 }, { "epoch": 0.5335830880385336, "grad_norm": 24.831363677978516, "learning_rate": 4.664169119614664e-07, "loss": 0.2899, "step": 3988 }, { "epoch": 0.5337168852020338, "grad_norm": 40.61035919189453, "learning_rate": 4.6628311479796627e-07, "loss": 0.299, "step": 3989 }, { "epoch": 0.5338506823655339, "grad_norm": 52.55266189575195, "learning_rate": 4.6614931763446615e-07, "loss": 0.6152, "step": 3990 }, { "epoch": 0.5339844795290339, "grad_norm": 27.41896629333496, "learning_rate": 4.66015520470966e-07, "loss": 0.4831, "step": 3991 }, { "epoch": 0.5341182766925341, "grad_norm": 32.40919494628906, "learning_rate": 4.6588172330746586e-07, "loss": 0.3477, "step": 3992 }, { "epoch": 0.5342520738560342, "grad_norm": 18.35667610168457, "learning_rate": 4.6574792614396574e-07, "loss": 0.3551, "step": 3993 }, { "epoch": 0.5343858710195344, "grad_norm": 19.830371856689453, "learning_rate": 4.656141289804656e-07, "loss": 0.2658, "step": 3994 }, { "epoch": 0.5345196681830345, "grad_norm": 17.47930335998535, "learning_rate": 4.6548033181696544e-07, "loss": 0.5341, "step": 3995 }, { "epoch": 0.5346534653465347, "grad_norm": 17.270689010620117, "learning_rate": 4.6534653465346537e-07, "loss": 0.3964, "step": 3996 }, { "epoch": 0.5347872625100348, "grad_norm": 24.87159538269043, "learning_rate": 4.652127374899652e-07, "loss": 0.7774, "step": 3997 }, { "epoch": 0.5349210596735349, "grad_norm": 30.722572326660156, "learning_rate": 4.650789403264651e-07, "loss": 0.5576, "step": 3998 }, { "epoch": 0.535054856837035, "grad_norm": 19.711650848388672, "learning_rate": 4.649451431629649e-07, "loss": 0.4864, "step": 3999 }, { "epoch": 0.5351886540005352, "grad_norm": 32.312259674072266, "learning_rate": 4.648113459994648e-07, "loss": 0.4349, "step": 4000 }, { "epoch": 0.5353224511640353, "grad_norm": 32.1274299621582, "learning_rate": 4.6467754883596466e-07, "loss": 0.4079, "step": 4001 }, { "epoch": 0.5354562483275355, "grad_norm": 23.22428321838379, "learning_rate": 4.645437516724645e-07, "loss": 0.6083, "step": 4002 }, { "epoch": 0.5355900454910356, "grad_norm": 24.90790557861328, "learning_rate": 4.6440995450896437e-07, "loss": 0.3637, "step": 4003 }, { "epoch": 0.5357238426545358, "grad_norm": 29.851411819458008, "learning_rate": 4.6427615734546425e-07, "loss": 0.3372, "step": 4004 }, { "epoch": 0.5358576398180358, "grad_norm": 14.934528350830078, "learning_rate": 4.6414236018196413e-07, "loss": 0.3659, "step": 4005 }, { "epoch": 0.535991436981536, "grad_norm": 29.97824478149414, "learning_rate": 4.6400856301846395e-07, "loss": 0.5444, "step": 4006 }, { "epoch": 0.5361252341450361, "grad_norm": 20.609891891479492, "learning_rate": 4.638747658549639e-07, "loss": 0.6319, "step": 4007 }, { "epoch": 0.5362590313085362, "grad_norm": 25.660762786865234, "learning_rate": 4.637409686914637e-07, "loss": 0.3987, "step": 4008 }, { "epoch": 0.5363928284720364, "grad_norm": 29.582639694213867, "learning_rate": 4.636071715279636e-07, "loss": 0.5845, "step": 4009 }, { "epoch": 0.5365266256355365, "grad_norm": 21.881799697875977, "learning_rate": 4.634733743644634e-07, "loss": 0.2394, "step": 4010 }, { "epoch": 0.5366604227990367, "grad_norm": 31.95891571044922, "learning_rate": 4.6333957720096335e-07, "loss": 0.3946, "step": 4011 }, { "epoch": 0.5367942199625368, "grad_norm": 21.702112197875977, "learning_rate": 4.632057800374632e-07, "loss": 0.3982, "step": 4012 }, { "epoch": 0.5369280171260369, "grad_norm": 24.129436492919922, "learning_rate": 4.6307198287396306e-07, "loss": 0.3223, "step": 4013 }, { "epoch": 0.537061814289537, "grad_norm": 24.109031677246094, "learning_rate": 4.629381857104629e-07, "loss": 0.3542, "step": 4014 }, { "epoch": 0.5371956114530372, "grad_norm": 39.89303970336914, "learning_rate": 4.628043885469628e-07, "loss": 0.7119, "step": 4015 }, { "epoch": 0.5373294086165373, "grad_norm": 14.89996337890625, "learning_rate": 4.6267059138346264e-07, "loss": 0.2687, "step": 4016 }, { "epoch": 0.5374632057800375, "grad_norm": 20.48587989807129, "learning_rate": 4.625367942199625e-07, "loss": 0.4204, "step": 4017 }, { "epoch": 0.5375970029435376, "grad_norm": 15.08414077758789, "learning_rate": 4.624029970564624e-07, "loss": 0.4072, "step": 4018 }, { "epoch": 0.5377308001070378, "grad_norm": 23.567718505859375, "learning_rate": 4.622691998929623e-07, "loss": 0.4272, "step": 4019 }, { "epoch": 0.5378645972705378, "grad_norm": 34.1723518371582, "learning_rate": 4.621354027294621e-07, "loss": 0.4266, "step": 4020 }, { "epoch": 0.537998394434038, "grad_norm": 13.671578407287598, "learning_rate": 4.62001605565962e-07, "loss": 0.3634, "step": 4021 }, { "epoch": 0.5381321915975381, "grad_norm": 15.00645923614502, "learning_rate": 4.6186780840246186e-07, "loss": 0.3785, "step": 4022 }, { "epoch": 0.5382659887610383, "grad_norm": 26.719173431396484, "learning_rate": 4.6173401123896174e-07, "loss": 0.453, "step": 4023 }, { "epoch": 0.5383997859245384, "grad_norm": 31.575336456298828, "learning_rate": 4.6160021407546157e-07, "loss": 0.5438, "step": 4024 }, { "epoch": 0.5385335830880386, "grad_norm": 15.828365325927734, "learning_rate": 4.614664169119614e-07, "loss": 0.2079, "step": 4025 }, { "epoch": 0.5386673802515387, "grad_norm": 24.242755889892578, "learning_rate": 4.6133261974846133e-07, "loss": 0.3596, "step": 4026 }, { "epoch": 0.5388011774150389, "grad_norm": 18.745115280151367, "learning_rate": 4.6119882258496115e-07, "loss": 0.454, "step": 4027 }, { "epoch": 0.5389349745785389, "grad_norm": 21.877159118652344, "learning_rate": 4.6106502542146103e-07, "loss": 0.3521, "step": 4028 }, { "epoch": 0.539068771742039, "grad_norm": 32.006141662597656, "learning_rate": 4.609312282579609e-07, "loss": 0.4417, "step": 4029 }, { "epoch": 0.5392025689055392, "grad_norm": 20.534210205078125, "learning_rate": 4.607974310944608e-07, "loss": 0.4498, "step": 4030 }, { "epoch": 0.5393363660690393, "grad_norm": 33.66996383666992, "learning_rate": 4.606636339309606e-07, "loss": 0.5028, "step": 4031 }, { "epoch": 0.5394701632325395, "grad_norm": 16.664813995361328, "learning_rate": 4.605298367674605e-07, "loss": 0.4433, "step": 4032 }, { "epoch": 0.5396039603960396, "grad_norm": 18.981098175048828, "learning_rate": 4.603960396039604e-07, "loss": 0.371, "step": 4033 }, { "epoch": 0.5397377575595398, "grad_norm": 24.494247436523438, "learning_rate": 4.6026224244046026e-07, "loss": 0.3881, "step": 4034 }, { "epoch": 0.5398715547230398, "grad_norm": 21.221668243408203, "learning_rate": 4.601284452769601e-07, "loss": 0.4785, "step": 4035 }, { "epoch": 0.54000535188654, "grad_norm": 23.271881103515625, "learning_rate": 4.5999464811345996e-07, "loss": 0.4088, "step": 4036 }, { "epoch": 0.5401391490500401, "grad_norm": 16.137060165405273, "learning_rate": 4.5986085094995984e-07, "loss": 0.2762, "step": 4037 }, { "epoch": 0.5402729462135403, "grad_norm": 17.459047317504883, "learning_rate": 4.597270537864597e-07, "loss": 0.245, "step": 4038 }, { "epoch": 0.5404067433770404, "grad_norm": 19.826202392578125, "learning_rate": 4.5959325662295955e-07, "loss": 0.4449, "step": 4039 }, { "epoch": 0.5405405405405406, "grad_norm": 33.62546157836914, "learning_rate": 4.594594594594595e-07, "loss": 0.4409, "step": 4040 }, { "epoch": 0.5406743377040407, "grad_norm": 15.156424522399902, "learning_rate": 4.593256622959593e-07, "loss": 0.4231, "step": 4041 }, { "epoch": 0.5408081348675408, "grad_norm": 20.67551612854004, "learning_rate": 4.591918651324592e-07, "loss": 0.5872, "step": 4042 }, { "epoch": 0.5409419320310409, "grad_norm": 36.48623275756836, "learning_rate": 4.59058067968959e-07, "loss": 0.5879, "step": 4043 }, { "epoch": 0.541075729194541, "grad_norm": 29.583152770996094, "learning_rate": 4.5892427080545894e-07, "loss": 0.6629, "step": 4044 }, { "epoch": 0.5412095263580412, "grad_norm": 52.00901412963867, "learning_rate": 4.5879047364195877e-07, "loss": 0.3359, "step": 4045 }, { "epoch": 0.5413433235215414, "grad_norm": 30.787643432617188, "learning_rate": 4.5865667647845865e-07, "loss": 0.3299, "step": 4046 }, { "epoch": 0.5414771206850415, "grad_norm": 35.82624435424805, "learning_rate": 4.585228793149585e-07, "loss": 0.3329, "step": 4047 }, { "epoch": 0.5416109178485417, "grad_norm": 17.36993408203125, "learning_rate": 4.583890821514584e-07, "loss": 0.4916, "step": 4048 }, { "epoch": 0.5417447150120418, "grad_norm": 24.747934341430664, "learning_rate": 4.5825528498795823e-07, "loss": 0.57, "step": 4049 }, { "epoch": 0.5418785121755418, "grad_norm": 33.462257385253906, "learning_rate": 4.5812148782445806e-07, "loss": 0.4578, "step": 4050 }, { "epoch": 0.542012309339042, "grad_norm": 33.896793365478516, "learning_rate": 4.57987690660958e-07, "loss": 0.4851, "step": 4051 }, { "epoch": 0.5421461065025421, "grad_norm": 20.890090942382812, "learning_rate": 4.578538934974578e-07, "loss": 0.4736, "step": 4052 }, { "epoch": 0.5422799036660423, "grad_norm": 20.655654907226562, "learning_rate": 4.577200963339577e-07, "loss": 0.3169, "step": 4053 }, { "epoch": 0.5424137008295424, "grad_norm": 15.7335786819458, "learning_rate": 4.575862991704575e-07, "loss": 0.4577, "step": 4054 }, { "epoch": 0.5425474979930426, "grad_norm": 16.78856658935547, "learning_rate": 4.5745250200695746e-07, "loss": 0.2898, "step": 4055 }, { "epoch": 0.5426812951565427, "grad_norm": 30.67220115661621, "learning_rate": 4.573187048434573e-07, "loss": 0.829, "step": 4056 }, { "epoch": 0.5428150923200428, "grad_norm": 18.975250244140625, "learning_rate": 4.5718490767995716e-07, "loss": 0.2249, "step": 4057 }, { "epoch": 0.5429488894835429, "grad_norm": 21.073383331298828, "learning_rate": 4.57051110516457e-07, "loss": 0.3908, "step": 4058 }, { "epoch": 0.5430826866470431, "grad_norm": 40.17736053466797, "learning_rate": 4.569173133529569e-07, "loss": 0.5562, "step": 4059 }, { "epoch": 0.5432164838105432, "grad_norm": 18.05917739868164, "learning_rate": 4.5678351618945675e-07, "loss": 0.4636, "step": 4060 }, { "epoch": 0.5433502809740434, "grad_norm": 22.449073791503906, "learning_rate": 4.566497190259566e-07, "loss": 0.391, "step": 4061 }, { "epoch": 0.5434840781375435, "grad_norm": 20.240846633911133, "learning_rate": 4.565159218624565e-07, "loss": 0.6152, "step": 4062 }, { "epoch": 0.5436178753010437, "grad_norm": 26.92072296142578, "learning_rate": 4.563821246989564e-07, "loss": 0.2325, "step": 4063 }, { "epoch": 0.5437516724645437, "grad_norm": 30.962810516357422, "learning_rate": 4.562483275354562e-07, "loss": 0.5641, "step": 4064 }, { "epoch": 0.5438854696280438, "grad_norm": 19.087238311767578, "learning_rate": 4.561145303719561e-07, "loss": 0.4149, "step": 4065 }, { "epoch": 0.544019266791544, "grad_norm": 13.488123893737793, "learning_rate": 4.5598073320845597e-07, "loss": 0.2853, "step": 4066 }, { "epoch": 0.5441530639550441, "grad_norm": 27.425418853759766, "learning_rate": 4.5584693604495585e-07, "loss": 0.551, "step": 4067 }, { "epoch": 0.5442868611185443, "grad_norm": 23.99152183532715, "learning_rate": 4.557131388814557e-07, "loss": 0.4586, "step": 4068 }, { "epoch": 0.5444206582820444, "grad_norm": 16.942325592041016, "learning_rate": 4.5557934171795555e-07, "loss": 0.2863, "step": 4069 }, { "epoch": 0.5445544554455446, "grad_norm": 18.77644157409668, "learning_rate": 4.5544554455445543e-07, "loss": 0.3787, "step": 4070 }, { "epoch": 0.5446882526090447, "grad_norm": 22.147064208984375, "learning_rate": 4.553117473909553e-07, "loss": 0.2571, "step": 4071 }, { "epoch": 0.5448220497725448, "grad_norm": 18.633386611938477, "learning_rate": 4.5517795022745514e-07, "loss": 0.357, "step": 4072 }, { "epoch": 0.5449558469360449, "grad_norm": 17.122575759887695, "learning_rate": 4.5504415306395507e-07, "loss": 0.48, "step": 4073 }, { "epoch": 0.5450896440995451, "grad_norm": 24.45584487915039, "learning_rate": 4.549103559004549e-07, "loss": 0.4011, "step": 4074 }, { "epoch": 0.5452234412630452, "grad_norm": 16.981460571289062, "learning_rate": 4.547765587369548e-07, "loss": 0.4, "step": 4075 }, { "epoch": 0.5453572384265454, "grad_norm": 20.103904724121094, "learning_rate": 4.546427615734546e-07, "loss": 0.3777, "step": 4076 }, { "epoch": 0.5454910355900455, "grad_norm": 41.3303337097168, "learning_rate": 4.5450896440995454e-07, "loss": 0.5589, "step": 4077 }, { "epoch": 0.5456248327535457, "grad_norm": 37.28273010253906, "learning_rate": 4.5437516724645436e-07, "loss": 0.5786, "step": 4078 }, { "epoch": 0.5457586299170457, "grad_norm": 27.126386642456055, "learning_rate": 4.542413700829542e-07, "loss": 0.2998, "step": 4079 }, { "epoch": 0.5458924270805459, "grad_norm": 20.851409912109375, "learning_rate": 4.5410757291945407e-07, "loss": 0.4949, "step": 4080 }, { "epoch": 0.546026224244046, "grad_norm": 24.41214942932129, "learning_rate": 4.5397377575595395e-07, "loss": 0.3999, "step": 4081 }, { "epoch": 0.5461600214075462, "grad_norm": 13.806890487670898, "learning_rate": 4.538399785924538e-07, "loss": 0.3225, "step": 4082 }, { "epoch": 0.5462938185710463, "grad_norm": 19.87745475769043, "learning_rate": 4.5370618142895365e-07, "loss": 0.4785, "step": 4083 }, { "epoch": 0.5464276157345465, "grad_norm": 28.183666229248047, "learning_rate": 4.535723842654536e-07, "loss": 0.48, "step": 4084 }, { "epoch": 0.5465614128980466, "grad_norm": 33.315757751464844, "learning_rate": 4.534385871019534e-07, "loss": 0.6492, "step": 4085 }, { "epoch": 0.5466952100615466, "grad_norm": 25.46073341369629, "learning_rate": 4.533047899384533e-07, "loss": 0.4124, "step": 4086 }, { "epoch": 0.5468290072250468, "grad_norm": 29.9565486907959, "learning_rate": 4.531709927749531e-07, "loss": 0.4229, "step": 4087 }, { "epoch": 0.5469628043885469, "grad_norm": 21.236207962036133, "learning_rate": 4.5303719561145305e-07, "loss": 0.4128, "step": 4088 }, { "epoch": 0.5470966015520471, "grad_norm": 22.39043617248535, "learning_rate": 4.529033984479529e-07, "loss": 0.4323, "step": 4089 }, { "epoch": 0.5472303987155472, "grad_norm": 29.686328887939453, "learning_rate": 4.5276960128445275e-07, "loss": 0.5555, "step": 4090 }, { "epoch": 0.5473641958790474, "grad_norm": 15.437530517578125, "learning_rate": 4.526358041209526e-07, "loss": 0.3937, "step": 4091 }, { "epoch": 0.5474979930425475, "grad_norm": 29.81545639038086, "learning_rate": 4.525020069574525e-07, "loss": 0.3657, "step": 4092 }, { "epoch": 0.5476317902060477, "grad_norm": 22.852718353271484, "learning_rate": 4.5236820979395234e-07, "loss": 0.4808, "step": 4093 }, { "epoch": 0.5477655873695477, "grad_norm": 18.348543167114258, "learning_rate": 4.522344126304522e-07, "loss": 0.3758, "step": 4094 }, { "epoch": 0.5478993845330479, "grad_norm": 16.926435470581055, "learning_rate": 4.521006154669521e-07, "loss": 0.4956, "step": 4095 }, { "epoch": 0.548033181696548, "grad_norm": 21.613054275512695, "learning_rate": 4.51966818303452e-07, "loss": 0.2409, "step": 4096 }, { "epoch": 0.5481669788600482, "grad_norm": 18.28876304626465, "learning_rate": 4.518330211399518e-07, "loss": 0.2758, "step": 4097 }, { "epoch": 0.5483007760235483, "grad_norm": 18.508960723876953, "learning_rate": 4.516992239764517e-07, "loss": 0.4494, "step": 4098 }, { "epoch": 0.5484345731870485, "grad_norm": 19.07659339904785, "learning_rate": 4.5156542681295156e-07, "loss": 0.4143, "step": 4099 }, { "epoch": 0.5485683703505486, "grad_norm": 26.117300033569336, "learning_rate": 4.5143162964945144e-07, "loss": 0.4347, "step": 4100 }, { "epoch": 0.5487021675140487, "grad_norm": 28.460458755493164, "learning_rate": 4.5129783248595127e-07, "loss": 0.4677, "step": 4101 }, { "epoch": 0.5488359646775488, "grad_norm": 17.822065353393555, "learning_rate": 4.511640353224511e-07, "loss": 0.4106, "step": 4102 }, { "epoch": 0.548969761841049, "grad_norm": 13.31904125213623, "learning_rate": 4.51030238158951e-07, "loss": 0.3117, "step": 4103 }, { "epoch": 0.5491035590045491, "grad_norm": 21.07779884338379, "learning_rate": 4.5089644099545085e-07, "loss": 0.5865, "step": 4104 }, { "epoch": 0.5492373561680493, "grad_norm": 16.155014038085938, "learning_rate": 4.5076264383195073e-07, "loss": 0.3048, "step": 4105 }, { "epoch": 0.5493711533315494, "grad_norm": 25.141963958740234, "learning_rate": 4.506288466684506e-07, "loss": 0.2999, "step": 4106 }, { "epoch": 0.5495049504950495, "grad_norm": 21.00425910949707, "learning_rate": 4.504950495049505e-07, "loss": 0.2717, "step": 4107 }, { "epoch": 0.5496387476585496, "grad_norm": 17.598844528198242, "learning_rate": 4.503612523414503e-07, "loss": 0.5045, "step": 4108 }, { "epoch": 0.5497725448220497, "grad_norm": 48.02562713623047, "learning_rate": 4.502274551779502e-07, "loss": 0.6842, "step": 4109 }, { "epoch": 0.5499063419855499, "grad_norm": 24.471250534057617, "learning_rate": 4.500936580144501e-07, "loss": 0.4948, "step": 4110 }, { "epoch": 0.55004013914905, "grad_norm": 15.790811538696289, "learning_rate": 4.4995986085094995e-07, "loss": 0.1879, "step": 4111 }, { "epoch": 0.5501739363125502, "grad_norm": 24.897605895996094, "learning_rate": 4.498260636874498e-07, "loss": 0.5419, "step": 4112 }, { "epoch": 0.5503077334760503, "grad_norm": 22.046892166137695, "learning_rate": 4.4969226652394966e-07, "loss": 0.4315, "step": 4113 }, { "epoch": 0.5504415306395505, "grad_norm": 24.064302444458008, "learning_rate": 4.4955846936044954e-07, "loss": 0.4824, "step": 4114 }, { "epoch": 0.5505753278030506, "grad_norm": 41.62315368652344, "learning_rate": 4.494246721969494e-07, "loss": 0.6667, "step": 4115 }, { "epoch": 0.5507091249665507, "grad_norm": 18.03438949584961, "learning_rate": 4.4929087503344924e-07, "loss": 0.3905, "step": 4116 }, { "epoch": 0.5508429221300508, "grad_norm": 14.948988914489746, "learning_rate": 4.491570778699492e-07, "loss": 0.2225, "step": 4117 }, { "epoch": 0.550976719293551, "grad_norm": 19.564136505126953, "learning_rate": 4.49023280706449e-07, "loss": 0.3244, "step": 4118 }, { "epoch": 0.5511105164570511, "grad_norm": 13.691413879394531, "learning_rate": 4.488894835429489e-07, "loss": 0.3826, "step": 4119 }, { "epoch": 0.5512443136205513, "grad_norm": 14.143356323242188, "learning_rate": 4.487556863794487e-07, "loss": 0.4412, "step": 4120 }, { "epoch": 0.5513781107840514, "grad_norm": 17.943443298339844, "learning_rate": 4.4862188921594864e-07, "loss": 0.4807, "step": 4121 }, { "epoch": 0.5515119079475516, "grad_norm": 28.09034538269043, "learning_rate": 4.4848809205244847e-07, "loss": 0.4641, "step": 4122 }, { "epoch": 0.5516457051110516, "grad_norm": 13.268622398376465, "learning_rate": 4.4835429488894835e-07, "loss": 0.2977, "step": 4123 }, { "epoch": 0.5517795022745517, "grad_norm": 18.30387306213379, "learning_rate": 4.4822049772544817e-07, "loss": 0.3874, "step": 4124 }, { "epoch": 0.5519132994380519, "grad_norm": 31.279926300048828, "learning_rate": 4.480867005619481e-07, "loss": 0.5325, "step": 4125 }, { "epoch": 0.552047096601552, "grad_norm": 13.541215896606445, "learning_rate": 4.4795290339844793e-07, "loss": 0.2455, "step": 4126 }, { "epoch": 0.5521808937650522, "grad_norm": 42.24747085571289, "learning_rate": 4.478191062349478e-07, "loss": 0.6508, "step": 4127 }, { "epoch": 0.5523146909285523, "grad_norm": 29.37230682373047, "learning_rate": 4.476853090714477e-07, "loss": 0.3566, "step": 4128 }, { "epoch": 0.5524484880920525, "grad_norm": 12.368042945861816, "learning_rate": 4.4755151190794757e-07, "loss": 0.3582, "step": 4129 }, { "epoch": 0.5525822852555525, "grad_norm": 19.87269401550293, "learning_rate": 4.474177147444474e-07, "loss": 0.2486, "step": 4130 }, { "epoch": 0.5527160824190527, "grad_norm": 20.178728103637695, "learning_rate": 4.472839175809472e-07, "loss": 0.2191, "step": 4131 }, { "epoch": 0.5528498795825528, "grad_norm": 35.41816329956055, "learning_rate": 4.4715012041744715e-07, "loss": 0.5016, "step": 4132 }, { "epoch": 0.552983676746053, "grad_norm": 28.545190811157227, "learning_rate": 4.47016323253947e-07, "loss": 0.2799, "step": 4133 }, { "epoch": 0.5531174739095531, "grad_norm": 43.3316764831543, "learning_rate": 4.4688252609044686e-07, "loss": 0.5626, "step": 4134 }, { "epoch": 0.5532512710730533, "grad_norm": 27.602731704711914, "learning_rate": 4.467487289269467e-07, "loss": 0.3845, "step": 4135 }, { "epoch": 0.5533850682365534, "grad_norm": 16.47417449951172, "learning_rate": 4.466149317634466e-07, "loss": 0.2842, "step": 4136 }, { "epoch": 0.5535188654000536, "grad_norm": 36.30484390258789, "learning_rate": 4.4648113459994644e-07, "loss": 0.5977, "step": 4137 }, { "epoch": 0.5536526625635536, "grad_norm": 34.262325286865234, "learning_rate": 4.463473374364463e-07, "loss": 0.6881, "step": 4138 }, { "epoch": 0.5537864597270538, "grad_norm": 36.18874740600586, "learning_rate": 4.462135402729462e-07, "loss": 0.416, "step": 4139 }, { "epoch": 0.5539202568905539, "grad_norm": 20.88640785217285, "learning_rate": 4.460797431094461e-07, "loss": 0.4741, "step": 4140 }, { "epoch": 0.5540540540540541, "grad_norm": 26.63558006286621, "learning_rate": 4.459459459459459e-07, "loss": 0.2646, "step": 4141 }, { "epoch": 0.5541878512175542, "grad_norm": 33.38965606689453, "learning_rate": 4.458121487824458e-07, "loss": 0.2859, "step": 4142 }, { "epoch": 0.5543216483810544, "grad_norm": 25.787307739257812, "learning_rate": 4.4567835161894567e-07, "loss": 0.4146, "step": 4143 }, { "epoch": 0.5544554455445545, "grad_norm": 19.235918045043945, "learning_rate": 4.4554455445544555e-07, "loss": 0.4629, "step": 4144 }, { "epoch": 0.5545892427080545, "grad_norm": 25.30014419555664, "learning_rate": 4.4541075729194537e-07, "loss": 0.445, "step": 4145 }, { "epoch": 0.5547230398715547, "grad_norm": 23.15123176574707, "learning_rate": 4.4527696012844525e-07, "loss": 0.4759, "step": 4146 }, { "epoch": 0.5548568370350548, "grad_norm": 21.935470581054688, "learning_rate": 4.4514316296494513e-07, "loss": 0.2956, "step": 4147 }, { "epoch": 0.554990634198555, "grad_norm": 27.881357192993164, "learning_rate": 4.45009365801445e-07, "loss": 0.3489, "step": 4148 }, { "epoch": 0.5551244313620551, "grad_norm": 20.863191604614258, "learning_rate": 4.4487556863794484e-07, "loss": 0.3673, "step": 4149 }, { "epoch": 0.5552582285255553, "grad_norm": 33.603065490722656, "learning_rate": 4.4474177147444477e-07, "loss": 0.3508, "step": 4150 }, { "epoch": 0.5553920256890554, "grad_norm": 28.081506729125977, "learning_rate": 4.446079743109446e-07, "loss": 0.5816, "step": 4151 }, { "epoch": 0.5555258228525555, "grad_norm": 25.654993057250977, "learning_rate": 4.444741771474445e-07, "loss": 0.5123, "step": 4152 }, { "epoch": 0.5556596200160556, "grad_norm": 31.176563262939453, "learning_rate": 4.443403799839443e-07, "loss": 0.5718, "step": 4153 }, { "epoch": 0.5557934171795558, "grad_norm": 26.57720375061035, "learning_rate": 4.4420658282044423e-07, "loss": 0.4929, "step": 4154 }, { "epoch": 0.5559272143430559, "grad_norm": 28.97879409790039, "learning_rate": 4.4407278565694406e-07, "loss": 0.3901, "step": 4155 }, { "epoch": 0.5560610115065561, "grad_norm": 34.984981536865234, "learning_rate": 4.439389884934439e-07, "loss": 0.4588, "step": 4156 }, { "epoch": 0.5561948086700562, "grad_norm": 41.20970153808594, "learning_rate": 4.4380519132994377e-07, "loss": 0.4602, "step": 4157 }, { "epoch": 0.5563286058335564, "grad_norm": 19.76811981201172, "learning_rate": 4.4367139416644364e-07, "loss": 0.2925, "step": 4158 }, { "epoch": 0.5564624029970565, "grad_norm": 31.11528205871582, "learning_rate": 4.435375970029435e-07, "loss": 0.3321, "step": 4159 }, { "epoch": 0.5565962001605566, "grad_norm": 14.438963890075684, "learning_rate": 4.4340379983944335e-07, "loss": 0.2458, "step": 4160 }, { "epoch": 0.5567299973240567, "grad_norm": 26.518112182617188, "learning_rate": 4.432700026759433e-07, "loss": 0.4315, "step": 4161 }, { "epoch": 0.5568637944875569, "grad_norm": 22.490440368652344, "learning_rate": 4.431362055124431e-07, "loss": 0.3359, "step": 4162 }, { "epoch": 0.556997591651057, "grad_norm": 23.759958267211914, "learning_rate": 4.43002408348943e-07, "loss": 0.3465, "step": 4163 }, { "epoch": 0.5571313888145571, "grad_norm": 31.497482299804688, "learning_rate": 4.428686111854428e-07, "loss": 0.3141, "step": 4164 }, { "epoch": 0.5572651859780573, "grad_norm": 45.492733001708984, "learning_rate": 4.4273481402194275e-07, "loss": 0.532, "step": 4165 }, { "epoch": 0.5573989831415574, "grad_norm": 25.78080177307129, "learning_rate": 4.4260101685844257e-07, "loss": 0.4014, "step": 4166 }, { "epoch": 0.5575327803050575, "grad_norm": 44.78652572631836, "learning_rate": 4.4246721969494245e-07, "loss": 0.5743, "step": 4167 }, { "epoch": 0.5576665774685576, "grad_norm": 17.882156372070312, "learning_rate": 4.423334225314423e-07, "loss": 0.3244, "step": 4168 }, { "epoch": 0.5578003746320578, "grad_norm": 24.41230583190918, "learning_rate": 4.421996253679422e-07, "loss": 0.4932, "step": 4169 }, { "epoch": 0.5579341717955579, "grad_norm": 34.59474563598633, "learning_rate": 4.4206582820444204e-07, "loss": 0.5104, "step": 4170 }, { "epoch": 0.5580679689590581, "grad_norm": 15.270916938781738, "learning_rate": 4.419320310409419e-07, "loss": 0.3901, "step": 4171 }, { "epoch": 0.5582017661225582, "grad_norm": 27.735004425048828, "learning_rate": 4.4179823387744174e-07, "loss": 0.3265, "step": 4172 }, { "epoch": 0.5583355632860584, "grad_norm": 11.609271049499512, "learning_rate": 4.416644367139417e-07, "loss": 0.3104, "step": 4173 }, { "epoch": 0.5584693604495584, "grad_norm": 30.074962615966797, "learning_rate": 4.415306395504415e-07, "loss": 0.5996, "step": 4174 }, { "epoch": 0.5586031576130586, "grad_norm": 43.91603088378906, "learning_rate": 4.413968423869414e-07, "loss": 0.709, "step": 4175 }, { "epoch": 0.5587369547765587, "grad_norm": 35.16593933105469, "learning_rate": 4.4126304522344126e-07, "loss": 0.3864, "step": 4176 }, { "epoch": 0.5588707519400589, "grad_norm": 28.426673889160156, "learning_rate": 4.4112924805994114e-07, "loss": 0.3446, "step": 4177 }, { "epoch": 0.559004549103559, "grad_norm": 32.84012222290039, "learning_rate": 4.4099545089644097e-07, "loss": 0.3971, "step": 4178 }, { "epoch": 0.5591383462670592, "grad_norm": 48.261470794677734, "learning_rate": 4.4086165373294084e-07, "loss": 0.4637, "step": 4179 }, { "epoch": 0.5592721434305593, "grad_norm": 21.88231086730957, "learning_rate": 4.407278565694407e-07, "loss": 0.6201, "step": 4180 }, { "epoch": 0.5594059405940595, "grad_norm": 47.744903564453125, "learning_rate": 4.405940594059406e-07, "loss": 0.3736, "step": 4181 }, { "epoch": 0.5595397377575595, "grad_norm": 22.7204532623291, "learning_rate": 4.4046026224244043e-07, "loss": 0.4394, "step": 4182 }, { "epoch": 0.5596735349210596, "grad_norm": 23.441879272460938, "learning_rate": 4.4032646507894026e-07, "loss": 0.4114, "step": 4183 }, { "epoch": 0.5598073320845598, "grad_norm": 33.99053192138672, "learning_rate": 4.401926679154402e-07, "loss": 0.6068, "step": 4184 }, { "epoch": 0.5599411292480599, "grad_norm": 16.235309600830078, "learning_rate": 4.4005887075194e-07, "loss": 0.3123, "step": 4185 }, { "epoch": 0.5600749264115601, "grad_norm": 27.30575180053711, "learning_rate": 4.399250735884399e-07, "loss": 0.462, "step": 4186 }, { "epoch": 0.5602087235750602, "grad_norm": 23.399372100830078, "learning_rate": 4.3979127642493977e-07, "loss": 0.5349, "step": 4187 }, { "epoch": 0.5603425207385604, "grad_norm": 17.523609161376953, "learning_rate": 4.3965747926143965e-07, "loss": 0.4253, "step": 4188 }, { "epoch": 0.5604763179020604, "grad_norm": 19.283870697021484, "learning_rate": 4.395236820979395e-07, "loss": 0.2942, "step": 4189 }, { "epoch": 0.5606101150655606, "grad_norm": 22.238149642944336, "learning_rate": 4.3938988493443936e-07, "loss": 0.3331, "step": 4190 }, { "epoch": 0.5607439122290607, "grad_norm": 20.57177734375, "learning_rate": 4.3925608777093924e-07, "loss": 0.3679, "step": 4191 }, { "epoch": 0.5608777093925609, "grad_norm": 18.397253036499023, "learning_rate": 4.391222906074391e-07, "loss": 0.3803, "step": 4192 }, { "epoch": 0.561011506556061, "grad_norm": 21.809139251708984, "learning_rate": 4.3898849344393894e-07, "loss": 0.3874, "step": 4193 }, { "epoch": 0.5611453037195612, "grad_norm": 18.131528854370117, "learning_rate": 4.388546962804388e-07, "loss": 0.2557, "step": 4194 }, { "epoch": 0.5612791008830613, "grad_norm": 22.618789672851562, "learning_rate": 4.387208991169387e-07, "loss": 0.2817, "step": 4195 }, { "epoch": 0.5614128980465614, "grad_norm": 16.22306251525879, "learning_rate": 4.385871019534386e-07, "loss": 0.4641, "step": 4196 }, { "epoch": 0.5615466952100615, "grad_norm": 31.463808059692383, "learning_rate": 4.384533047899384e-07, "loss": 0.5386, "step": 4197 }, { "epoch": 0.5616804923735617, "grad_norm": 15.863655090332031, "learning_rate": 4.3831950762643834e-07, "loss": 0.4112, "step": 4198 }, { "epoch": 0.5618142895370618, "grad_norm": 30.963104248046875, "learning_rate": 4.3818571046293817e-07, "loss": 0.521, "step": 4199 }, { "epoch": 0.561948086700562, "grad_norm": 29.591684341430664, "learning_rate": 4.3805191329943804e-07, "loss": 0.3507, "step": 4200 }, { "epoch": 0.5620818838640621, "grad_norm": 22.613439559936523, "learning_rate": 4.3791811613593787e-07, "loss": 0.4891, "step": 4201 }, { "epoch": 0.5622156810275623, "grad_norm": 37.50221633911133, "learning_rate": 4.377843189724378e-07, "loss": 0.45, "step": 4202 }, { "epoch": 0.5623494781910624, "grad_norm": 30.67543601989746, "learning_rate": 4.3765052180893763e-07, "loss": 0.4607, "step": 4203 }, { "epoch": 0.5624832753545624, "grad_norm": 17.685014724731445, "learning_rate": 4.375167246454375e-07, "loss": 0.1531, "step": 4204 }, { "epoch": 0.5626170725180626, "grad_norm": 26.06410026550293, "learning_rate": 4.3738292748193733e-07, "loss": 0.2767, "step": 4205 }, { "epoch": 0.5627508696815627, "grad_norm": 22.7185001373291, "learning_rate": 4.3724913031843727e-07, "loss": 0.5406, "step": 4206 }, { "epoch": 0.5628846668450629, "grad_norm": 21.302608489990234, "learning_rate": 4.371153331549371e-07, "loss": 0.326, "step": 4207 }, { "epoch": 0.563018464008563, "grad_norm": 16.673620223999023, "learning_rate": 4.369815359914369e-07, "loss": 0.3015, "step": 4208 }, { "epoch": 0.5631522611720632, "grad_norm": 25.436372756958008, "learning_rate": 4.3684773882793685e-07, "loss": 0.4042, "step": 4209 }, { "epoch": 0.5632860583355633, "grad_norm": 28.99821662902832, "learning_rate": 4.367139416644367e-07, "loss": 0.3085, "step": 4210 }, { "epoch": 0.5634198554990634, "grad_norm": 23.837566375732422, "learning_rate": 4.3658014450093656e-07, "loss": 0.5045, "step": 4211 }, { "epoch": 0.5635536526625635, "grad_norm": 45.376182556152344, "learning_rate": 4.364463473374364e-07, "loss": 0.554, "step": 4212 }, { "epoch": 0.5636874498260637, "grad_norm": 22.561567306518555, "learning_rate": 4.363125501739363e-07, "loss": 0.3663, "step": 4213 }, { "epoch": 0.5638212469895638, "grad_norm": 50.6370849609375, "learning_rate": 4.3617875301043614e-07, "loss": 0.6287, "step": 4214 }, { "epoch": 0.563955044153064, "grad_norm": 50.439395904541016, "learning_rate": 4.36044955846936e-07, "loss": 0.3696, "step": 4215 }, { "epoch": 0.5640888413165641, "grad_norm": 44.98612594604492, "learning_rate": 4.3591115868343585e-07, "loss": 0.4377, "step": 4216 }, { "epoch": 0.5642226384800643, "grad_norm": 16.755611419677734, "learning_rate": 4.357773615199358e-07, "loss": 0.3165, "step": 4217 }, { "epoch": 0.5643564356435643, "grad_norm": 37.08945846557617, "learning_rate": 4.356435643564356e-07, "loss": 0.3442, "step": 4218 }, { "epoch": 0.5644902328070645, "grad_norm": 26.60102653503418, "learning_rate": 4.355097671929355e-07, "loss": 0.3563, "step": 4219 }, { "epoch": 0.5646240299705646, "grad_norm": 23.209352493286133, "learning_rate": 4.3537597002943537e-07, "loss": 0.5105, "step": 4220 }, { "epoch": 0.5647578271340647, "grad_norm": 32.39219665527344, "learning_rate": 4.3524217286593524e-07, "loss": 0.3913, "step": 4221 }, { "epoch": 0.5648916242975649, "grad_norm": 32.56468200683594, "learning_rate": 4.3510837570243507e-07, "loss": 0.5314, "step": 4222 }, { "epoch": 0.565025421461065, "grad_norm": 20.773845672607422, "learning_rate": 4.3497457853893495e-07, "loss": 0.3728, "step": 4223 }, { "epoch": 0.5651592186245652, "grad_norm": 18.986328125, "learning_rate": 4.3484078137543483e-07, "loss": 0.2336, "step": 4224 }, { "epoch": 0.5652930157880653, "grad_norm": 35.82231521606445, "learning_rate": 4.347069842119347e-07, "loss": 0.6526, "step": 4225 }, { "epoch": 0.5654268129515654, "grad_norm": 18.890295028686523, "learning_rate": 4.3457318704843453e-07, "loss": 0.5287, "step": 4226 }, { "epoch": 0.5655606101150655, "grad_norm": 25.84481430053711, "learning_rate": 4.344393898849344e-07, "loss": 0.4409, "step": 4227 }, { "epoch": 0.5656944072785657, "grad_norm": 21.980487823486328, "learning_rate": 4.343055927214343e-07, "loss": 0.4841, "step": 4228 }, { "epoch": 0.5658282044420658, "grad_norm": 25.282062530517578, "learning_rate": 4.3417179555793417e-07, "loss": 0.5286, "step": 4229 }, { "epoch": 0.565962001605566, "grad_norm": 42.677696228027344, "learning_rate": 4.34037998394434e-07, "loss": 0.4657, "step": 4230 }, { "epoch": 0.5660957987690661, "grad_norm": 33.77067565917969, "learning_rate": 4.3390420123093393e-07, "loss": 0.6885, "step": 4231 }, { "epoch": 0.5662295959325663, "grad_norm": 23.320297241210938, "learning_rate": 4.3377040406743376e-07, "loss": 0.4535, "step": 4232 }, { "epoch": 0.5663633930960663, "grad_norm": 15.366232872009277, "learning_rate": 4.3363660690393364e-07, "loss": 0.3743, "step": 4233 }, { "epoch": 0.5664971902595665, "grad_norm": 38.11214065551758, "learning_rate": 4.3350280974043346e-07, "loss": 0.3942, "step": 4234 }, { "epoch": 0.5666309874230666, "grad_norm": 19.096590042114258, "learning_rate": 4.3336901257693334e-07, "loss": 0.3935, "step": 4235 }, { "epoch": 0.5667647845865668, "grad_norm": 25.464391708374023, "learning_rate": 4.332352154134332e-07, "loss": 0.5394, "step": 4236 }, { "epoch": 0.5668985817500669, "grad_norm": 19.44609260559082, "learning_rate": 4.3310141824993305e-07, "loss": 0.3755, "step": 4237 }, { "epoch": 0.5670323789135671, "grad_norm": 42.83562469482422, "learning_rate": 4.3296762108643293e-07, "loss": 0.441, "step": 4238 }, { "epoch": 0.5671661760770672, "grad_norm": 21.66522979736328, "learning_rate": 4.328338239229328e-07, "loss": 0.3821, "step": 4239 }, { "epoch": 0.5672999732405672, "grad_norm": 18.614362716674805, "learning_rate": 4.327000267594327e-07, "loss": 0.3516, "step": 4240 }, { "epoch": 0.5674337704040674, "grad_norm": 26.794322967529297, "learning_rate": 4.325662295959325e-07, "loss": 0.2505, "step": 4241 }, { "epoch": 0.5675675675675675, "grad_norm": 22.024681091308594, "learning_rate": 4.3243243243243244e-07, "loss": 0.4945, "step": 4242 }, { "epoch": 0.5677013647310677, "grad_norm": 17.758996963500977, "learning_rate": 4.3229863526893227e-07, "loss": 0.3523, "step": 4243 }, { "epoch": 0.5678351618945678, "grad_norm": 44.21141052246094, "learning_rate": 4.3216483810543215e-07, "loss": 0.6878, "step": 4244 }, { "epoch": 0.567968959058068, "grad_norm": 34.3513298034668, "learning_rate": 4.32031040941932e-07, "loss": 0.546, "step": 4245 }, { "epoch": 0.5681027562215681, "grad_norm": 30.7362060546875, "learning_rate": 4.318972437784319e-07, "loss": 0.6448, "step": 4246 }, { "epoch": 0.5682365533850683, "grad_norm": 27.89305877685547, "learning_rate": 4.3176344661493173e-07, "loss": 0.4851, "step": 4247 }, { "epoch": 0.5683703505485683, "grad_norm": 18.716337203979492, "learning_rate": 4.316296494514316e-07, "loss": 0.2995, "step": 4248 }, { "epoch": 0.5685041477120685, "grad_norm": 23.377779006958008, "learning_rate": 4.3149585228793144e-07, "loss": 0.5675, "step": 4249 }, { "epoch": 0.5686379448755686, "grad_norm": 18.581735610961914, "learning_rate": 4.3136205512443137e-07, "loss": 0.4856, "step": 4250 }, { "epoch": 0.5687717420390688, "grad_norm": 36.3592414855957, "learning_rate": 4.312282579609312e-07, "loss": 0.6946, "step": 4251 }, { "epoch": 0.5689055392025689, "grad_norm": 20.445770263671875, "learning_rate": 4.310944607974311e-07, "loss": 0.3243, "step": 4252 }, { "epoch": 0.5690393363660691, "grad_norm": 28.840229034423828, "learning_rate": 4.3096066363393096e-07, "loss": 0.4294, "step": 4253 }, { "epoch": 0.5691731335295692, "grad_norm": 12.770272254943848, "learning_rate": 4.3082686647043084e-07, "loss": 0.3076, "step": 4254 }, { "epoch": 0.5693069306930693, "grad_norm": 25.758563995361328, "learning_rate": 4.3069306930693066e-07, "loss": 0.4214, "step": 4255 }, { "epoch": 0.5694407278565694, "grad_norm": 25.241987228393555, "learning_rate": 4.3055927214343054e-07, "loss": 0.2795, "step": 4256 }, { "epoch": 0.5695745250200696, "grad_norm": 24.837970733642578, "learning_rate": 4.304254749799304e-07, "loss": 0.4202, "step": 4257 }, { "epoch": 0.5697083221835697, "grad_norm": 29.75922966003418, "learning_rate": 4.302916778164303e-07, "loss": 0.4532, "step": 4258 }, { "epoch": 0.5698421193470699, "grad_norm": 16.66409683227539, "learning_rate": 4.3015788065293013e-07, "loss": 0.2613, "step": 4259 }, { "epoch": 0.56997591651057, "grad_norm": 28.185556411743164, "learning_rate": 4.3002408348942995e-07, "loss": 0.3726, "step": 4260 }, { "epoch": 0.5701097136740702, "grad_norm": 35.366756439208984, "learning_rate": 4.298902863259299e-07, "loss": 0.5084, "step": 4261 }, { "epoch": 0.5702435108375702, "grad_norm": 24.322166442871094, "learning_rate": 4.297564891624297e-07, "loss": 0.6375, "step": 4262 }, { "epoch": 0.5703773080010703, "grad_norm": 19.412328720092773, "learning_rate": 4.296226919989296e-07, "loss": 0.4759, "step": 4263 }, { "epoch": 0.5705111051645705, "grad_norm": 32.23774337768555, "learning_rate": 4.2948889483542947e-07, "loss": 0.4479, "step": 4264 }, { "epoch": 0.5706449023280706, "grad_norm": 43.290836334228516, "learning_rate": 4.2935509767192935e-07, "loss": 0.645, "step": 4265 }, { "epoch": 0.5707786994915708, "grad_norm": 37.18659591674805, "learning_rate": 4.292213005084292e-07, "loss": 0.391, "step": 4266 }, { "epoch": 0.5709124966550709, "grad_norm": 52.6976432800293, "learning_rate": 4.2908750334492906e-07, "loss": 0.5115, "step": 4267 }, { "epoch": 0.5710462938185711, "grad_norm": 22.760522842407227, "learning_rate": 4.2895370618142893e-07, "loss": 0.4068, "step": 4268 }, { "epoch": 0.5711800909820712, "grad_norm": 23.3000431060791, "learning_rate": 4.288199090179288e-07, "loss": 0.4455, "step": 4269 }, { "epoch": 0.5713138881455713, "grad_norm": 24.146896362304688, "learning_rate": 4.2868611185442864e-07, "loss": 0.369, "step": 4270 }, { "epoch": 0.5714476853090714, "grad_norm": 29.493104934692383, "learning_rate": 4.285523146909285e-07, "loss": 0.5266, "step": 4271 }, { "epoch": 0.5715814824725716, "grad_norm": 30.333026885986328, "learning_rate": 4.284185175274284e-07, "loss": 0.5576, "step": 4272 }, { "epoch": 0.5717152796360717, "grad_norm": 23.114437103271484, "learning_rate": 4.282847203639283e-07, "loss": 0.4536, "step": 4273 }, { "epoch": 0.5718490767995719, "grad_norm": 35.182498931884766, "learning_rate": 4.281509232004281e-07, "loss": 0.5979, "step": 4274 }, { "epoch": 0.571982873963072, "grad_norm": 16.594890594482422, "learning_rate": 4.2801712603692804e-07, "loss": 0.4375, "step": 4275 }, { "epoch": 0.5721166711265722, "grad_norm": 16.942373275756836, "learning_rate": 4.2788332887342786e-07, "loss": 0.3291, "step": 4276 }, { "epoch": 0.5722504682900722, "grad_norm": 31.805496215820312, "learning_rate": 4.2774953170992774e-07, "loss": 0.5385, "step": 4277 }, { "epoch": 0.5723842654535723, "grad_norm": 33.07448959350586, "learning_rate": 4.2761573454642757e-07, "loss": 0.4459, "step": 4278 }, { "epoch": 0.5725180626170725, "grad_norm": 18.429513931274414, "learning_rate": 4.274819373829275e-07, "loss": 0.3789, "step": 4279 }, { "epoch": 0.5726518597805726, "grad_norm": 25.617231369018555, "learning_rate": 4.2734814021942733e-07, "loss": 0.3155, "step": 4280 }, { "epoch": 0.5727856569440728, "grad_norm": 55.98066329956055, "learning_rate": 4.272143430559272e-07, "loss": 0.7272, "step": 4281 }, { "epoch": 0.572919454107573, "grad_norm": 16.131006240844727, "learning_rate": 4.2708054589242703e-07, "loss": 0.51, "step": 4282 }, { "epoch": 0.5730532512710731, "grad_norm": 32.08684539794922, "learning_rate": 4.2694674872892697e-07, "loss": 0.4151, "step": 4283 }, { "epoch": 0.5731870484345731, "grad_norm": 44.2843132019043, "learning_rate": 4.268129515654268e-07, "loss": 0.6448, "step": 4284 }, { "epoch": 0.5733208455980733, "grad_norm": 23.124141693115234, "learning_rate": 4.2667915440192667e-07, "loss": 0.4265, "step": 4285 }, { "epoch": 0.5734546427615734, "grad_norm": 13.969578742980957, "learning_rate": 4.2654535723842655e-07, "loss": 0.2558, "step": 4286 }, { "epoch": 0.5735884399250736, "grad_norm": 25.191526412963867, "learning_rate": 4.264115600749264e-07, "loss": 0.3631, "step": 4287 }, { "epoch": 0.5737222370885737, "grad_norm": 23.2822322845459, "learning_rate": 4.2627776291142626e-07, "loss": 0.4426, "step": 4288 }, { "epoch": 0.5738560342520739, "grad_norm": 36.94560241699219, "learning_rate": 4.261439657479261e-07, "loss": 0.7689, "step": 4289 }, { "epoch": 0.573989831415574, "grad_norm": 23.658615112304688, "learning_rate": 4.26010168584426e-07, "loss": 0.3185, "step": 4290 }, { "epoch": 0.5741236285790742, "grad_norm": 34.708370208740234, "learning_rate": 4.2587637142092584e-07, "loss": 0.5412, "step": 4291 }, { "epoch": 0.5742574257425742, "grad_norm": 16.332721710205078, "learning_rate": 4.257425742574257e-07, "loss": 0.3023, "step": 4292 }, { "epoch": 0.5743912229060744, "grad_norm": 19.695343017578125, "learning_rate": 4.2560877709392555e-07, "loss": 0.4069, "step": 4293 }, { "epoch": 0.5745250200695745, "grad_norm": 31.477815628051758, "learning_rate": 4.254749799304255e-07, "loss": 0.7677, "step": 4294 }, { "epoch": 0.5746588172330747, "grad_norm": 21.629955291748047, "learning_rate": 4.253411827669253e-07, "loss": 0.4472, "step": 4295 }, { "epoch": 0.5747926143965748, "grad_norm": 35.976871490478516, "learning_rate": 4.252073856034252e-07, "loss": 0.4365, "step": 4296 }, { "epoch": 0.574926411560075, "grad_norm": 21.988361358642578, "learning_rate": 4.2507358843992506e-07, "loss": 0.5636, "step": 4297 }, { "epoch": 0.5750602087235751, "grad_norm": 35.24331283569336, "learning_rate": 4.2493979127642494e-07, "loss": 0.3978, "step": 4298 }, { "epoch": 0.5751940058870751, "grad_norm": 42.553836822509766, "learning_rate": 4.2480599411292477e-07, "loss": 0.3826, "step": 4299 }, { "epoch": 0.5753278030505753, "grad_norm": 22.7469539642334, "learning_rate": 4.2467219694942465e-07, "loss": 0.5462, "step": 4300 }, { "epoch": 0.5754616002140754, "grad_norm": 25.951311111450195, "learning_rate": 4.2453839978592453e-07, "loss": 0.5168, "step": 4301 }, { "epoch": 0.5755953973775756, "grad_norm": 14.941669464111328, "learning_rate": 4.244046026224244e-07, "loss": 0.3084, "step": 4302 }, { "epoch": 0.5757291945410757, "grad_norm": 19.98797035217285, "learning_rate": 4.2427080545892423e-07, "loss": 0.4666, "step": 4303 }, { "epoch": 0.5758629917045759, "grad_norm": 28.369470596313477, "learning_rate": 4.241370082954241e-07, "loss": 0.4172, "step": 4304 }, { "epoch": 0.575996788868076, "grad_norm": 31.79190444946289, "learning_rate": 4.24003211131924e-07, "loss": 0.5018, "step": 4305 }, { "epoch": 0.5761305860315761, "grad_norm": 26.200637817382812, "learning_rate": 4.2386941396842387e-07, "loss": 0.634, "step": 4306 }, { "epoch": 0.5762643831950762, "grad_norm": 31.04981803894043, "learning_rate": 4.237356168049237e-07, "loss": 0.7439, "step": 4307 }, { "epoch": 0.5763981803585764, "grad_norm": 21.672517776489258, "learning_rate": 4.2360181964142363e-07, "loss": 0.3027, "step": 4308 }, { "epoch": 0.5765319775220765, "grad_norm": 18.10089874267578, "learning_rate": 4.2346802247792346e-07, "loss": 0.4176, "step": 4309 }, { "epoch": 0.5766657746855767, "grad_norm": 28.18654441833496, "learning_rate": 4.2333422531442333e-07, "loss": 0.6374, "step": 4310 }, { "epoch": 0.5767995718490768, "grad_norm": 16.63274383544922, "learning_rate": 4.2320042815092316e-07, "loss": 0.2413, "step": 4311 }, { "epoch": 0.576933369012577, "grad_norm": 22.85098648071289, "learning_rate": 4.230666309874231e-07, "loss": 0.5129, "step": 4312 }, { "epoch": 0.5770671661760771, "grad_norm": 19.291603088378906, "learning_rate": 4.229328338239229e-07, "loss": 0.4872, "step": 4313 }, { "epoch": 0.5772009633395772, "grad_norm": 22.75650978088379, "learning_rate": 4.2279903666042275e-07, "loss": 0.3822, "step": 4314 }, { "epoch": 0.5773347605030773, "grad_norm": 25.34139060974121, "learning_rate": 4.226652394969226e-07, "loss": 0.3556, "step": 4315 }, { "epoch": 0.5774685576665775, "grad_norm": 10.408072471618652, "learning_rate": 4.225314423334225e-07, "loss": 0.2395, "step": 4316 }, { "epoch": 0.5776023548300776, "grad_norm": 18.66485023498535, "learning_rate": 4.223976451699224e-07, "loss": 0.422, "step": 4317 }, { "epoch": 0.5777361519935778, "grad_norm": 15.439923286437988, "learning_rate": 4.222638480064222e-07, "loss": 0.4598, "step": 4318 }, { "epoch": 0.5778699491570779, "grad_norm": 16.987070083618164, "learning_rate": 4.2213005084292214e-07, "loss": 0.2319, "step": 4319 }, { "epoch": 0.578003746320578, "grad_norm": 32.31244659423828, "learning_rate": 4.2199625367942197e-07, "loss": 0.5228, "step": 4320 }, { "epoch": 0.5781375434840781, "grad_norm": 17.05211067199707, "learning_rate": 4.2186245651592185e-07, "loss": 0.2691, "step": 4321 }, { "epoch": 0.5782713406475782, "grad_norm": 12.594517707824707, "learning_rate": 4.217286593524217e-07, "loss": 0.3284, "step": 4322 }, { "epoch": 0.5784051378110784, "grad_norm": 17.818300247192383, "learning_rate": 4.215948621889216e-07, "loss": 0.2496, "step": 4323 }, { "epoch": 0.5785389349745785, "grad_norm": 48.78641891479492, "learning_rate": 4.2146106502542143e-07, "loss": 0.7955, "step": 4324 }, { "epoch": 0.5786727321380787, "grad_norm": 25.1239070892334, "learning_rate": 4.213272678619213e-07, "loss": 0.6159, "step": 4325 }, { "epoch": 0.5788065293015788, "grad_norm": 30.195144653320312, "learning_rate": 4.2119347069842114e-07, "loss": 0.5105, "step": 4326 }, { "epoch": 0.578940326465079, "grad_norm": 25.42458152770996, "learning_rate": 4.2105967353492107e-07, "loss": 0.4333, "step": 4327 }, { "epoch": 0.5790741236285791, "grad_norm": 15.157245635986328, "learning_rate": 4.209258763714209e-07, "loss": 0.3839, "step": 4328 }, { "epoch": 0.5792079207920792, "grad_norm": 27.85223388671875, "learning_rate": 4.207920792079208e-07, "loss": 0.3284, "step": 4329 }, { "epoch": 0.5793417179555793, "grad_norm": 36.77550506591797, "learning_rate": 4.2065828204442066e-07, "loss": 0.3204, "step": 4330 }, { "epoch": 0.5794755151190795, "grad_norm": 26.25826644897461, "learning_rate": 4.2052448488092053e-07, "loss": 0.4772, "step": 4331 }, { "epoch": 0.5796093122825796, "grad_norm": 19.70702362060547, "learning_rate": 4.2039068771742036e-07, "loss": 0.409, "step": 4332 }, { "epoch": 0.5797431094460798, "grad_norm": 26.727880477905273, "learning_rate": 4.2025689055392024e-07, "loss": 0.4972, "step": 4333 }, { "epoch": 0.5798769066095799, "grad_norm": 44.88861083984375, "learning_rate": 4.201230933904201e-07, "loss": 0.5519, "step": 4334 }, { "epoch": 0.5800107037730801, "grad_norm": 29.104686737060547, "learning_rate": 4.1998929622692e-07, "loss": 0.4624, "step": 4335 }, { "epoch": 0.5801445009365801, "grad_norm": 16.828794479370117, "learning_rate": 4.198554990634198e-07, "loss": 0.263, "step": 4336 }, { "epoch": 0.5802782981000802, "grad_norm": 21.95220184326172, "learning_rate": 4.1972170189991965e-07, "loss": 0.4123, "step": 4337 }, { "epoch": 0.5804120952635804, "grad_norm": 25.956329345703125, "learning_rate": 4.195879047364196e-07, "loss": 0.5308, "step": 4338 }, { "epoch": 0.5805458924270805, "grad_norm": 24.64419937133789, "learning_rate": 4.194541075729194e-07, "loss": 0.3539, "step": 4339 }, { "epoch": 0.5806796895905807, "grad_norm": 32.26490783691406, "learning_rate": 4.193203104094193e-07, "loss": 0.3962, "step": 4340 }, { "epoch": 0.5808134867540808, "grad_norm": 26.327836990356445, "learning_rate": 4.1918651324591917e-07, "loss": 0.693, "step": 4341 }, { "epoch": 0.580947283917581, "grad_norm": 26.766630172729492, "learning_rate": 4.1905271608241905e-07, "loss": 0.4575, "step": 4342 }, { "epoch": 0.581081081081081, "grad_norm": 42.30158615112305, "learning_rate": 4.189189189189189e-07, "loss": 0.6102, "step": 4343 }, { "epoch": 0.5812148782445812, "grad_norm": 27.023630142211914, "learning_rate": 4.1878512175541875e-07, "loss": 0.4485, "step": 4344 }, { "epoch": 0.5813486754080813, "grad_norm": 45.497283935546875, "learning_rate": 4.1865132459191863e-07, "loss": 0.3847, "step": 4345 }, { "epoch": 0.5814824725715815, "grad_norm": 29.673755645751953, "learning_rate": 4.185175274284185e-07, "loss": 0.3742, "step": 4346 }, { "epoch": 0.5816162697350816, "grad_norm": 21.741744995117188, "learning_rate": 4.1838373026491834e-07, "loss": 0.5414, "step": 4347 }, { "epoch": 0.5817500668985818, "grad_norm": 34.76877212524414, "learning_rate": 4.182499331014182e-07, "loss": 0.5161, "step": 4348 }, { "epoch": 0.5818838640620819, "grad_norm": 31.732065200805664, "learning_rate": 4.181161359379181e-07, "loss": 0.2454, "step": 4349 }, { "epoch": 0.5820176612255821, "grad_norm": 18.093379974365234, "learning_rate": 4.17982338774418e-07, "loss": 0.3921, "step": 4350 }, { "epoch": 0.5821514583890821, "grad_norm": 38.524818420410156, "learning_rate": 4.178485416109178e-07, "loss": 0.5169, "step": 4351 }, { "epoch": 0.5822852555525823, "grad_norm": 37.48975372314453, "learning_rate": 4.1771474444741773e-07, "loss": 0.2835, "step": 4352 }, { "epoch": 0.5824190527160824, "grad_norm": 47.95796203613281, "learning_rate": 4.1758094728391756e-07, "loss": 0.3593, "step": 4353 }, { "epoch": 0.5825528498795826, "grad_norm": 22.703065872192383, "learning_rate": 4.1744715012041744e-07, "loss": 0.3561, "step": 4354 }, { "epoch": 0.5826866470430827, "grad_norm": 16.846208572387695, "learning_rate": 4.1731335295691727e-07, "loss": 0.3283, "step": 4355 }, { "epoch": 0.5828204442065829, "grad_norm": 31.840320587158203, "learning_rate": 4.171795557934172e-07, "loss": 0.4473, "step": 4356 }, { "epoch": 0.582954241370083, "grad_norm": 25.8057804107666, "learning_rate": 4.17045758629917e-07, "loss": 0.3924, "step": 4357 }, { "epoch": 0.583088038533583, "grad_norm": 25.53543472290039, "learning_rate": 4.169119614664169e-07, "loss": 0.4232, "step": 4358 }, { "epoch": 0.5832218356970832, "grad_norm": 32.1478157043457, "learning_rate": 4.1677816430291673e-07, "loss": 0.508, "step": 4359 }, { "epoch": 0.5833556328605833, "grad_norm": 32.67182159423828, "learning_rate": 4.1664436713941666e-07, "loss": 0.4717, "step": 4360 }, { "epoch": 0.5834894300240835, "grad_norm": 17.047948837280273, "learning_rate": 4.165105699759165e-07, "loss": 0.4202, "step": 4361 }, { "epoch": 0.5836232271875836, "grad_norm": 24.579191207885742, "learning_rate": 4.1637677281241637e-07, "loss": 0.5486, "step": 4362 }, { "epoch": 0.5837570243510838, "grad_norm": 26.678829193115234, "learning_rate": 4.1624297564891625e-07, "loss": 0.4377, "step": 4363 }, { "epoch": 0.5838908215145839, "grad_norm": 24.7497615814209, "learning_rate": 4.1610917848541613e-07, "loss": 0.3776, "step": 4364 }, { "epoch": 0.584024618678084, "grad_norm": 17.779014587402344, "learning_rate": 4.1597538132191595e-07, "loss": 0.3314, "step": 4365 }, { "epoch": 0.5841584158415841, "grad_norm": 21.745962142944336, "learning_rate": 4.158415841584158e-07, "loss": 0.4419, "step": 4366 }, { "epoch": 0.5842922130050843, "grad_norm": 18.35731315612793, "learning_rate": 4.157077869949157e-07, "loss": 0.4773, "step": 4367 }, { "epoch": 0.5844260101685844, "grad_norm": 18.578306198120117, "learning_rate": 4.1557398983141554e-07, "loss": 0.2668, "step": 4368 }, { "epoch": 0.5845598073320846, "grad_norm": 30.15928840637207, "learning_rate": 4.154401926679154e-07, "loss": 0.4821, "step": 4369 }, { "epoch": 0.5846936044955847, "grad_norm": 33.30975341796875, "learning_rate": 4.1530639550441524e-07, "loss": 0.7016, "step": 4370 }, { "epoch": 0.5848274016590849, "grad_norm": 29.122455596923828, "learning_rate": 4.151725983409152e-07, "loss": 0.3073, "step": 4371 }, { "epoch": 0.584961198822585, "grad_norm": 21.25771713256836, "learning_rate": 4.15038801177415e-07, "loss": 0.4449, "step": 4372 }, { "epoch": 0.585094995986085, "grad_norm": 27.67799949645996, "learning_rate": 4.149050040139149e-07, "loss": 0.5102, "step": 4373 }, { "epoch": 0.5852287931495852, "grad_norm": 11.911003112792969, "learning_rate": 4.1477120685041476e-07, "loss": 0.333, "step": 4374 }, { "epoch": 0.5853625903130854, "grad_norm": 40.62964630126953, "learning_rate": 4.1463740968691464e-07, "loss": 0.3076, "step": 4375 }, { "epoch": 0.5854963874765855, "grad_norm": 21.17774772644043, "learning_rate": 4.1450361252341447e-07, "loss": 0.5258, "step": 4376 }, { "epoch": 0.5856301846400856, "grad_norm": 18.144954681396484, "learning_rate": 4.1436981535991435e-07, "loss": 0.1234, "step": 4377 }, { "epoch": 0.5857639818035858, "grad_norm": 30.5235652923584, "learning_rate": 4.142360181964142e-07, "loss": 0.4266, "step": 4378 }, { "epoch": 0.585897778967086, "grad_norm": 18.14736557006836, "learning_rate": 4.141022210329141e-07, "loss": 0.3651, "step": 4379 }, { "epoch": 0.586031576130586, "grad_norm": 15.53046989440918, "learning_rate": 4.1396842386941393e-07, "loss": 0.3325, "step": 4380 }, { "epoch": 0.5861653732940861, "grad_norm": 17.760713577270508, "learning_rate": 4.138346267059138e-07, "loss": 0.2136, "step": 4381 }, { "epoch": 0.5862991704575863, "grad_norm": 30.161060333251953, "learning_rate": 4.137008295424137e-07, "loss": 0.367, "step": 4382 }, { "epoch": 0.5864329676210864, "grad_norm": 21.08481216430664, "learning_rate": 4.1356703237891357e-07, "loss": 0.5875, "step": 4383 }, { "epoch": 0.5865667647845866, "grad_norm": 25.36458396911621, "learning_rate": 4.134332352154134e-07, "loss": 0.3573, "step": 4384 }, { "epoch": 0.5867005619480867, "grad_norm": 58.16733169555664, "learning_rate": 4.1329943805191333e-07, "loss": 0.6481, "step": 4385 }, { "epoch": 0.5868343591115869, "grad_norm": 39.2602653503418, "learning_rate": 4.1316564088841315e-07, "loss": 0.5102, "step": 4386 }, { "epoch": 0.5869681562750869, "grad_norm": 10.153427124023438, "learning_rate": 4.1303184372491303e-07, "loss": 0.249, "step": 4387 }, { "epoch": 0.5871019534385871, "grad_norm": 22.4888858795166, "learning_rate": 4.1289804656141286e-07, "loss": 0.2838, "step": 4388 }, { "epoch": 0.5872357506020872, "grad_norm": 23.202880859375, "learning_rate": 4.127642493979128e-07, "loss": 0.4047, "step": 4389 }, { "epoch": 0.5873695477655874, "grad_norm": 33.534549713134766, "learning_rate": 4.126304522344126e-07, "loss": 0.6088, "step": 4390 }, { "epoch": 0.5875033449290875, "grad_norm": 25.334535598754883, "learning_rate": 4.1249665507091244e-07, "loss": 0.2973, "step": 4391 }, { "epoch": 0.5876371420925877, "grad_norm": 18.97001838684082, "learning_rate": 4.123628579074123e-07, "loss": 0.3649, "step": 4392 }, { "epoch": 0.5877709392560878, "grad_norm": 21.549041748046875, "learning_rate": 4.122290607439122e-07, "loss": 0.2041, "step": 4393 }, { "epoch": 0.587904736419588, "grad_norm": 24.08395767211914, "learning_rate": 4.120952635804121e-07, "loss": 0.5184, "step": 4394 }, { "epoch": 0.588038533583088, "grad_norm": 20.898929595947266, "learning_rate": 4.119614664169119e-07, "loss": 0.3659, "step": 4395 }, { "epoch": 0.5881723307465881, "grad_norm": 16.64963722229004, "learning_rate": 4.1182766925341184e-07, "loss": 0.2206, "step": 4396 }, { "epoch": 0.5883061279100883, "grad_norm": 20.733945846557617, "learning_rate": 4.1169387208991167e-07, "loss": 0.2335, "step": 4397 }, { "epoch": 0.5884399250735884, "grad_norm": 30.431379318237305, "learning_rate": 4.1156007492641155e-07, "loss": 0.5004, "step": 4398 }, { "epoch": 0.5885737222370886, "grad_norm": 25.910476684570312, "learning_rate": 4.1142627776291137e-07, "loss": 0.4709, "step": 4399 }, { "epoch": 0.5887075194005887, "grad_norm": 13.64357852935791, "learning_rate": 4.112924805994113e-07, "loss": 0.3336, "step": 4400 }, { "epoch": 0.5888413165640889, "grad_norm": 38.46171569824219, "learning_rate": 4.1115868343591113e-07, "loss": 0.5258, "step": 4401 }, { "epoch": 0.5889751137275889, "grad_norm": 20.19017791748047, "learning_rate": 4.11024886272411e-07, "loss": 0.3929, "step": 4402 }, { "epoch": 0.5891089108910891, "grad_norm": 30.709074020385742, "learning_rate": 4.1089108910891084e-07, "loss": 0.3552, "step": 4403 }, { "epoch": 0.5892427080545892, "grad_norm": 18.304988861083984, "learning_rate": 4.1075729194541077e-07, "loss": 0.2395, "step": 4404 }, { "epoch": 0.5893765052180894, "grad_norm": 27.33718490600586, "learning_rate": 4.106234947819106e-07, "loss": 0.2205, "step": 4405 }, { "epoch": 0.5895103023815895, "grad_norm": 18.005596160888672, "learning_rate": 4.104896976184105e-07, "loss": 0.3452, "step": 4406 }, { "epoch": 0.5896440995450897, "grad_norm": 18.13420867919922, "learning_rate": 4.1035590045491035e-07, "loss": 0.28, "step": 4407 }, { "epoch": 0.5897778967085898, "grad_norm": 35.01900863647461, "learning_rate": 4.1022210329141023e-07, "loss": 0.4687, "step": 4408 }, { "epoch": 0.5899116938720899, "grad_norm": 55.8632698059082, "learning_rate": 4.1008830612791006e-07, "loss": 0.4223, "step": 4409 }, { "epoch": 0.59004549103559, "grad_norm": 30.617332458496094, "learning_rate": 4.0995450896440994e-07, "loss": 0.7325, "step": 4410 }, { "epoch": 0.5901792881990902, "grad_norm": 18.839073181152344, "learning_rate": 4.098207118009098e-07, "loss": 0.278, "step": 4411 }, { "epoch": 0.5903130853625903, "grad_norm": 26.512548446655273, "learning_rate": 4.096869146374097e-07, "loss": 0.3531, "step": 4412 }, { "epoch": 0.5904468825260905, "grad_norm": 24.80384635925293, "learning_rate": 4.095531174739095e-07, "loss": 0.384, "step": 4413 }, { "epoch": 0.5905806796895906, "grad_norm": 22.923120498657227, "learning_rate": 4.094193203104094e-07, "loss": 0.2942, "step": 4414 }, { "epoch": 0.5907144768530908, "grad_norm": 48.6689567565918, "learning_rate": 4.092855231469093e-07, "loss": 0.5406, "step": 4415 }, { "epoch": 0.5908482740165909, "grad_norm": 21.462589263916016, "learning_rate": 4.0915172598340916e-07, "loss": 0.5786, "step": 4416 }, { "epoch": 0.5909820711800909, "grad_norm": 38.876651763916016, "learning_rate": 4.09017928819909e-07, "loss": 0.4963, "step": 4417 }, { "epoch": 0.5911158683435911, "grad_norm": 27.632343292236328, "learning_rate": 4.0888413165640887e-07, "loss": 0.4486, "step": 4418 }, { "epoch": 0.5912496655070912, "grad_norm": 17.664249420166016, "learning_rate": 4.0875033449290875e-07, "loss": 0.251, "step": 4419 }, { "epoch": 0.5913834626705914, "grad_norm": 23.67853546142578, "learning_rate": 4.0861653732940857e-07, "loss": 0.4128, "step": 4420 }, { "epoch": 0.5915172598340915, "grad_norm": 32.95771408081055, "learning_rate": 4.0848274016590845e-07, "loss": 0.3011, "step": 4421 }, { "epoch": 0.5916510569975917, "grad_norm": 31.797544479370117, "learning_rate": 4.0834894300240833e-07, "loss": 0.4006, "step": 4422 }, { "epoch": 0.5917848541610918, "grad_norm": 31.15419578552246, "learning_rate": 4.082151458389082e-07, "loss": 0.2316, "step": 4423 }, { "epoch": 0.5919186513245919, "grad_norm": 33.39226150512695, "learning_rate": 4.0808134867540804e-07, "loss": 0.226, "step": 4424 }, { "epoch": 0.592052448488092, "grad_norm": 28.351608276367188, "learning_rate": 4.079475515119079e-07, "loss": 0.508, "step": 4425 }, { "epoch": 0.5921862456515922, "grad_norm": 29.334331512451172, "learning_rate": 4.078137543484078e-07, "loss": 0.3229, "step": 4426 }, { "epoch": 0.5923200428150923, "grad_norm": 23.278392791748047, "learning_rate": 4.076799571849077e-07, "loss": 0.3711, "step": 4427 }, { "epoch": 0.5924538399785925, "grad_norm": 38.78950500488281, "learning_rate": 4.075461600214075e-07, "loss": 0.7773, "step": 4428 }, { "epoch": 0.5925876371420926, "grad_norm": 38.68907165527344, "learning_rate": 4.0741236285790743e-07, "loss": 0.3325, "step": 4429 }, { "epoch": 0.5927214343055928, "grad_norm": 32.8158073425293, "learning_rate": 4.0727856569440726e-07, "loss": 0.4004, "step": 4430 }, { "epoch": 0.5928552314690928, "grad_norm": 20.379053115844727, "learning_rate": 4.0714476853090714e-07, "loss": 0.331, "step": 4431 }, { "epoch": 0.592989028632593, "grad_norm": 35.48988723754883, "learning_rate": 4.0701097136740696e-07, "loss": 0.4663, "step": 4432 }, { "epoch": 0.5931228257960931, "grad_norm": 25.326824188232422, "learning_rate": 4.068771742039069e-07, "loss": 0.4541, "step": 4433 }, { "epoch": 0.5932566229595932, "grad_norm": 27.59333610534668, "learning_rate": 4.067433770404067e-07, "loss": 0.407, "step": 4434 }, { "epoch": 0.5933904201230934, "grad_norm": 21.725509643554688, "learning_rate": 4.066095798769066e-07, "loss": 0.2055, "step": 4435 }, { "epoch": 0.5935242172865935, "grad_norm": 22.988632202148438, "learning_rate": 4.0647578271340643e-07, "loss": 0.4233, "step": 4436 }, { "epoch": 0.5936580144500937, "grad_norm": 14.926629066467285, "learning_rate": 4.0634198554990636e-07, "loss": 0.2091, "step": 4437 }, { "epoch": 0.5937918116135938, "grad_norm": 30.263410568237305, "learning_rate": 4.062081883864062e-07, "loss": 0.5246, "step": 4438 }, { "epoch": 0.5939256087770939, "grad_norm": 18.102527618408203, "learning_rate": 4.0607439122290607e-07, "loss": 0.334, "step": 4439 }, { "epoch": 0.594059405940594, "grad_norm": 30.71893882751465, "learning_rate": 4.0594059405940595e-07, "loss": 0.3149, "step": 4440 }, { "epoch": 0.5941932031040942, "grad_norm": 37.461360931396484, "learning_rate": 4.058067968959058e-07, "loss": 0.4568, "step": 4441 }, { "epoch": 0.5943270002675943, "grad_norm": 16.477676391601562, "learning_rate": 4.0567299973240565e-07, "loss": 0.2122, "step": 4442 }, { "epoch": 0.5944607974310945, "grad_norm": 21.96830177307129, "learning_rate": 4.055392025689055e-07, "loss": 0.2288, "step": 4443 }, { "epoch": 0.5945945945945946, "grad_norm": 72.02178192138672, "learning_rate": 4.054054054054054e-07, "loss": 0.976, "step": 4444 }, { "epoch": 0.5947283917580948, "grad_norm": 30.718751907348633, "learning_rate": 4.0527160824190524e-07, "loss": 0.4125, "step": 4445 }, { "epoch": 0.5948621889215948, "grad_norm": 27.69855308532715, "learning_rate": 4.051378110784051e-07, "loss": 0.3977, "step": 4446 }, { "epoch": 0.594995986085095, "grad_norm": 24.400449752807617, "learning_rate": 4.0500401391490494e-07, "loss": 0.4616, "step": 4447 }, { "epoch": 0.5951297832485951, "grad_norm": 36.923912048339844, "learning_rate": 4.048702167514049e-07, "loss": 0.5348, "step": 4448 }, { "epoch": 0.5952635804120953, "grad_norm": 29.606069564819336, "learning_rate": 4.047364195879047e-07, "loss": 0.4922, "step": 4449 }, { "epoch": 0.5953973775755954, "grad_norm": 47.42457962036133, "learning_rate": 4.046026224244046e-07, "loss": 0.5221, "step": 4450 }, { "epoch": 0.5955311747390956, "grad_norm": 32.04573440551758, "learning_rate": 4.0446882526090446e-07, "loss": 0.3231, "step": 4451 }, { "epoch": 0.5956649719025957, "grad_norm": 32.88508224487305, "learning_rate": 4.0433502809740434e-07, "loss": 0.6049, "step": 4452 }, { "epoch": 0.5957987690660957, "grad_norm": 33.05910873413086, "learning_rate": 4.0420123093390416e-07, "loss": 0.527, "step": 4453 }, { "epoch": 0.5959325662295959, "grad_norm": 17.496315002441406, "learning_rate": 4.0406743377040404e-07, "loss": 0.2133, "step": 4454 }, { "epoch": 0.596066363393096, "grad_norm": 38.18158721923828, "learning_rate": 4.039336366069039e-07, "loss": 0.4112, "step": 4455 }, { "epoch": 0.5962001605565962, "grad_norm": 23.16773223876953, "learning_rate": 4.037998394434038e-07, "loss": 0.4508, "step": 4456 }, { "epoch": 0.5963339577200963, "grad_norm": 27.99581527709961, "learning_rate": 4.0366604227990363e-07, "loss": 0.5419, "step": 4457 }, { "epoch": 0.5964677548835965, "grad_norm": 19.007949829101562, "learning_rate": 4.035322451164035e-07, "loss": 0.3503, "step": 4458 }, { "epoch": 0.5966015520470966, "grad_norm": 14.582478523254395, "learning_rate": 4.033984479529034e-07, "loss": 0.3224, "step": 4459 }, { "epoch": 0.5967353492105968, "grad_norm": 18.663707733154297, "learning_rate": 4.0326465078940327e-07, "loss": 0.4636, "step": 4460 }, { "epoch": 0.5968691463740968, "grad_norm": 22.951780319213867, "learning_rate": 4.031308536259031e-07, "loss": 0.5115, "step": 4461 }, { "epoch": 0.597002943537597, "grad_norm": 36.879791259765625, "learning_rate": 4.0299705646240297e-07, "loss": 0.4459, "step": 4462 }, { "epoch": 0.5971367407010971, "grad_norm": 27.2891902923584, "learning_rate": 4.0286325929890285e-07, "loss": 0.3687, "step": 4463 }, { "epoch": 0.5972705378645973, "grad_norm": 26.958044052124023, "learning_rate": 4.0272946213540273e-07, "loss": 0.5168, "step": 4464 }, { "epoch": 0.5974043350280974, "grad_norm": 16.69524574279785, "learning_rate": 4.0259566497190256e-07, "loss": 0.2117, "step": 4465 }, { "epoch": 0.5975381321915976, "grad_norm": 28.598594665527344, "learning_rate": 4.024618678084025e-07, "loss": 0.4524, "step": 4466 }, { "epoch": 0.5976719293550977, "grad_norm": 23.250823974609375, "learning_rate": 4.023280706449023e-07, "loss": 0.2302, "step": 4467 }, { "epoch": 0.5978057265185978, "grad_norm": 29.859060287475586, "learning_rate": 4.021942734814022e-07, "loss": 0.4448, "step": 4468 }, { "epoch": 0.5979395236820979, "grad_norm": 29.571063995361328, "learning_rate": 4.02060476317902e-07, "loss": 0.449, "step": 4469 }, { "epoch": 0.598073320845598, "grad_norm": 25.48114013671875, "learning_rate": 4.019266791544019e-07, "loss": 0.4432, "step": 4470 }, { "epoch": 0.5982071180090982, "grad_norm": 28.247100830078125, "learning_rate": 4.017928819909018e-07, "loss": 0.4075, "step": 4471 }, { "epoch": 0.5983409151725984, "grad_norm": 30.413129806518555, "learning_rate": 4.016590848274016e-07, "loss": 0.4359, "step": 4472 }, { "epoch": 0.5984747123360985, "grad_norm": 40.10308837890625, "learning_rate": 4.015252876639015e-07, "loss": 0.4917, "step": 4473 }, { "epoch": 0.5986085094995987, "grad_norm": 21.262714385986328, "learning_rate": 4.0139149050040136e-07, "loss": 0.3486, "step": 4474 }, { "epoch": 0.5987423066630987, "grad_norm": 25.435211181640625, "learning_rate": 4.0125769333690124e-07, "loss": 0.3554, "step": 4475 }, { "epoch": 0.5988761038265988, "grad_norm": 35.35029983520508, "learning_rate": 4.0112389617340107e-07, "loss": 0.501, "step": 4476 }, { "epoch": 0.599009900990099, "grad_norm": 22.82924461364746, "learning_rate": 4.00990099009901e-07, "loss": 0.4999, "step": 4477 }, { "epoch": 0.5991436981535991, "grad_norm": 26.373781204223633, "learning_rate": 4.0085630184640083e-07, "loss": 0.6227, "step": 4478 }, { "epoch": 0.5992774953170993, "grad_norm": 20.936159133911133, "learning_rate": 4.007225046829007e-07, "loss": 0.4831, "step": 4479 }, { "epoch": 0.5994112924805994, "grad_norm": 32.094783782958984, "learning_rate": 4.0058870751940053e-07, "loss": 0.5462, "step": 4480 }, { "epoch": 0.5995450896440996, "grad_norm": 16.36827278137207, "learning_rate": 4.0045491035590047e-07, "loss": 0.4258, "step": 4481 }, { "epoch": 0.5996788868075997, "grad_norm": 23.75251579284668, "learning_rate": 4.003211131924003e-07, "loss": 0.2835, "step": 4482 }, { "epoch": 0.5998126839710998, "grad_norm": 20.285982131958008, "learning_rate": 4.0018731602890017e-07, "loss": 0.3475, "step": 4483 }, { "epoch": 0.5999464811345999, "grad_norm": 20.420541763305664, "learning_rate": 4.000535188654e-07, "loss": 0.4211, "step": 4484 }, { "epoch": 0.6000802782981001, "grad_norm": 23.35195541381836, "learning_rate": 3.9991972170189993e-07, "loss": 0.4659, "step": 4485 }, { "epoch": 0.6002140754616002, "grad_norm": 36.48163986206055, "learning_rate": 3.9978592453839976e-07, "loss": 0.5664, "step": 4486 }, { "epoch": 0.6003478726251004, "grad_norm": 21.59243392944336, "learning_rate": 3.9965212737489964e-07, "loss": 0.3067, "step": 4487 }, { "epoch": 0.6004816697886005, "grad_norm": 30.348541259765625, "learning_rate": 3.995183302113995e-07, "loss": 0.3593, "step": 4488 }, { "epoch": 0.6006154669521007, "grad_norm": 14.40955638885498, "learning_rate": 3.993845330478994e-07, "loss": 0.3173, "step": 4489 }, { "epoch": 0.6007492641156007, "grad_norm": 23.7658634185791, "learning_rate": 3.992507358843992e-07, "loss": 0.3232, "step": 4490 }, { "epoch": 0.6008830612791008, "grad_norm": 37.964420318603516, "learning_rate": 3.991169387208991e-07, "loss": 0.3659, "step": 4491 }, { "epoch": 0.601016858442601, "grad_norm": 24.08111000061035, "learning_rate": 3.98983141557399e-07, "loss": 0.3757, "step": 4492 }, { "epoch": 0.6011506556061011, "grad_norm": 17.59868049621582, "learning_rate": 3.9884934439389886e-07, "loss": 0.2963, "step": 4493 }, { "epoch": 0.6012844527696013, "grad_norm": 21.618288040161133, "learning_rate": 3.987155472303987e-07, "loss": 0.4416, "step": 4494 }, { "epoch": 0.6014182499331014, "grad_norm": 18.457801818847656, "learning_rate": 3.985817500668985e-07, "loss": 0.3162, "step": 4495 }, { "epoch": 0.6015520470966016, "grad_norm": 37.36551284790039, "learning_rate": 3.9844795290339844e-07, "loss": 0.6493, "step": 4496 }, { "epoch": 0.6016858442601016, "grad_norm": 50.83626937866211, "learning_rate": 3.9831415573989827e-07, "loss": 0.6507, "step": 4497 }, { "epoch": 0.6018196414236018, "grad_norm": 45.31346893310547, "learning_rate": 3.9818035857639815e-07, "loss": 0.6316, "step": 4498 }, { "epoch": 0.6019534385871019, "grad_norm": 26.287694931030273, "learning_rate": 3.9804656141289803e-07, "loss": 0.414, "step": 4499 }, { "epoch": 0.6020872357506021, "grad_norm": 36.981224060058594, "learning_rate": 3.979127642493979e-07, "loss": 0.4861, "step": 4500 }, { "epoch": 0.6022210329141022, "grad_norm": 19.38559913635254, "learning_rate": 3.9777896708589773e-07, "loss": 0.2144, "step": 4501 }, { "epoch": 0.6023548300776024, "grad_norm": 23.681020736694336, "learning_rate": 3.976451699223976e-07, "loss": 0.3939, "step": 4502 }, { "epoch": 0.6024886272411025, "grad_norm": 21.246417999267578, "learning_rate": 3.975113727588975e-07, "loss": 0.2987, "step": 4503 }, { "epoch": 0.6026224244046027, "grad_norm": 30.79447364807129, "learning_rate": 3.9737757559539737e-07, "loss": 0.4001, "step": 4504 }, { "epoch": 0.6027562215681027, "grad_norm": 19.76508903503418, "learning_rate": 3.972437784318972e-07, "loss": 0.3919, "step": 4505 }, { "epoch": 0.6028900187316029, "grad_norm": 18.57863998413086, "learning_rate": 3.971099812683971e-07, "loss": 0.4023, "step": 4506 }, { "epoch": 0.603023815895103, "grad_norm": 27.221405029296875, "learning_rate": 3.9697618410489696e-07, "loss": 0.4129, "step": 4507 }, { "epoch": 0.6031576130586032, "grad_norm": 34.301605224609375, "learning_rate": 3.9684238694139684e-07, "loss": 0.5157, "step": 4508 }, { "epoch": 0.6032914102221033, "grad_norm": 22.840229034423828, "learning_rate": 3.9670858977789666e-07, "loss": 0.3636, "step": 4509 }, { "epoch": 0.6034252073856035, "grad_norm": 40.70695495605469, "learning_rate": 3.965747926143966e-07, "loss": 0.5935, "step": 4510 }, { "epoch": 0.6035590045491036, "grad_norm": 29.689208984375, "learning_rate": 3.964409954508964e-07, "loss": 0.4397, "step": 4511 }, { "epoch": 0.6036928017126036, "grad_norm": 34.28284454345703, "learning_rate": 3.963071982873963e-07, "loss": 0.5829, "step": 4512 }, { "epoch": 0.6038265988761038, "grad_norm": 25.773508071899414, "learning_rate": 3.9617340112389613e-07, "loss": 0.3955, "step": 4513 }, { "epoch": 0.6039603960396039, "grad_norm": 50.0213508605957, "learning_rate": 3.9603960396039606e-07, "loss": 0.3955, "step": 4514 }, { "epoch": 0.6040941932031041, "grad_norm": 57.970909118652344, "learning_rate": 3.959058067968959e-07, "loss": 0.397, "step": 4515 }, { "epoch": 0.6042279903666042, "grad_norm": 26.39674949645996, "learning_rate": 3.9577200963339576e-07, "loss": 0.2947, "step": 4516 }, { "epoch": 0.6043617875301044, "grad_norm": 26.54131507873535, "learning_rate": 3.956382124698956e-07, "loss": 0.2989, "step": 4517 }, { "epoch": 0.6044955846936045, "grad_norm": 40.79667663574219, "learning_rate": 3.955044153063955e-07, "loss": 0.44, "step": 4518 }, { "epoch": 0.6046293818571046, "grad_norm": 51.95839309692383, "learning_rate": 3.9537061814289535e-07, "loss": 0.395, "step": 4519 }, { "epoch": 0.6047631790206047, "grad_norm": 26.63762855529785, "learning_rate": 3.9523682097939523e-07, "loss": 0.4814, "step": 4520 }, { "epoch": 0.6048969761841049, "grad_norm": 30.612388610839844, "learning_rate": 3.951030238158951e-07, "loss": 0.3852, "step": 4521 }, { "epoch": 0.605030773347605, "grad_norm": 39.314208984375, "learning_rate": 3.9496922665239493e-07, "loss": 0.3628, "step": 4522 }, { "epoch": 0.6051645705111052, "grad_norm": 23.586660385131836, "learning_rate": 3.948354294888948e-07, "loss": 0.3644, "step": 4523 }, { "epoch": 0.6052983676746053, "grad_norm": 12.98852252960205, "learning_rate": 3.9470163232539464e-07, "loss": 0.1365, "step": 4524 }, { "epoch": 0.6054321648381055, "grad_norm": 39.259788513183594, "learning_rate": 3.9456783516189457e-07, "loss": 0.4658, "step": 4525 }, { "epoch": 0.6055659620016056, "grad_norm": 63.04983139038086, "learning_rate": 3.944340379983944e-07, "loss": 0.6048, "step": 4526 }, { "epoch": 0.6056997591651057, "grad_norm": 28.31089210510254, "learning_rate": 3.943002408348943e-07, "loss": 0.4105, "step": 4527 }, { "epoch": 0.6058335563286058, "grad_norm": 29.259851455688477, "learning_rate": 3.941664436713941e-07, "loss": 0.5769, "step": 4528 }, { "epoch": 0.605967353492106, "grad_norm": 27.92089080810547, "learning_rate": 3.9403264650789404e-07, "loss": 0.3823, "step": 4529 }, { "epoch": 0.6061011506556061, "grad_norm": 25.642820358276367, "learning_rate": 3.9389884934439386e-07, "loss": 0.3738, "step": 4530 }, { "epoch": 0.6062349478191063, "grad_norm": 30.61127471923828, "learning_rate": 3.9376505218089374e-07, "loss": 0.5096, "step": 4531 }, { "epoch": 0.6063687449826064, "grad_norm": 42.718753814697266, "learning_rate": 3.936312550173936e-07, "loss": 0.5091, "step": 4532 }, { "epoch": 0.6065025421461065, "grad_norm": 24.567237854003906, "learning_rate": 3.934974578538935e-07, "loss": 0.3592, "step": 4533 }, { "epoch": 0.6066363393096066, "grad_norm": 32.077571868896484, "learning_rate": 3.9336366069039333e-07, "loss": 0.5438, "step": 4534 }, { "epoch": 0.6067701364731067, "grad_norm": 41.726341247558594, "learning_rate": 3.932298635268932e-07, "loss": 0.6307, "step": 4535 }, { "epoch": 0.6069039336366069, "grad_norm": 41.79242706298828, "learning_rate": 3.930960663633931e-07, "loss": 0.5701, "step": 4536 }, { "epoch": 0.607037730800107, "grad_norm": 24.916919708251953, "learning_rate": 3.9296226919989296e-07, "loss": 0.4075, "step": 4537 }, { "epoch": 0.6071715279636072, "grad_norm": 31.371986389160156, "learning_rate": 3.928284720363928e-07, "loss": 0.3488, "step": 4538 }, { "epoch": 0.6073053251271073, "grad_norm": 27.848665237426758, "learning_rate": 3.9269467487289267e-07, "loss": 0.3712, "step": 4539 }, { "epoch": 0.6074391222906075, "grad_norm": 36.6080322265625, "learning_rate": 3.9256087770939255e-07, "loss": 0.3848, "step": 4540 }, { "epoch": 0.6075729194541075, "grad_norm": 33.29654312133789, "learning_rate": 3.9242708054589243e-07, "loss": 0.3878, "step": 4541 }, { "epoch": 0.6077067166176077, "grad_norm": 24.978160858154297, "learning_rate": 3.9229328338239225e-07, "loss": 0.492, "step": 4542 }, { "epoch": 0.6078405137811078, "grad_norm": 18.27268409729004, "learning_rate": 3.921594862188922e-07, "loss": 0.3894, "step": 4543 }, { "epoch": 0.607974310944608, "grad_norm": 38.390316009521484, "learning_rate": 3.92025689055392e-07, "loss": 0.4148, "step": 4544 }, { "epoch": 0.6081081081081081, "grad_norm": 14.89281940460205, "learning_rate": 3.918918918918919e-07, "loss": 0.3179, "step": 4545 }, { "epoch": 0.6082419052716083, "grad_norm": 27.228715896606445, "learning_rate": 3.917580947283917e-07, "loss": 0.5387, "step": 4546 }, { "epoch": 0.6083757024351084, "grad_norm": 30.86823844909668, "learning_rate": 3.9162429756489165e-07, "loss": 0.4677, "step": 4547 }, { "epoch": 0.6085094995986086, "grad_norm": 23.080429077148438, "learning_rate": 3.914905004013915e-07, "loss": 0.6173, "step": 4548 }, { "epoch": 0.6086432967621086, "grad_norm": 40.004241943359375, "learning_rate": 3.913567032378913e-07, "loss": 0.4121, "step": 4549 }, { "epoch": 0.6087770939256087, "grad_norm": 28.290285110473633, "learning_rate": 3.912229060743912e-07, "loss": 0.3806, "step": 4550 }, { "epoch": 0.6089108910891089, "grad_norm": 40.04389953613281, "learning_rate": 3.9108910891089106e-07, "loss": 0.4805, "step": 4551 }, { "epoch": 0.609044688252609, "grad_norm": 23.54449462890625, "learning_rate": 3.9095531174739094e-07, "loss": 0.2972, "step": 4552 }, { "epoch": 0.6091784854161092, "grad_norm": 28.758039474487305, "learning_rate": 3.9082151458389077e-07, "loss": 0.2925, "step": 4553 }, { "epoch": 0.6093122825796093, "grad_norm": 20.82735824584961, "learning_rate": 3.906877174203907e-07, "loss": 0.3326, "step": 4554 }, { "epoch": 0.6094460797431095, "grad_norm": 21.963842391967773, "learning_rate": 3.9055392025689053e-07, "loss": 0.2694, "step": 4555 }, { "epoch": 0.6095798769066095, "grad_norm": 31.230152130126953, "learning_rate": 3.904201230933904e-07, "loss": 0.4721, "step": 4556 }, { "epoch": 0.6097136740701097, "grad_norm": 32.12763595581055, "learning_rate": 3.9028632592989023e-07, "loss": 0.4112, "step": 4557 }, { "epoch": 0.6098474712336098, "grad_norm": 30.098888397216797, "learning_rate": 3.9015252876639016e-07, "loss": 0.2886, "step": 4558 }, { "epoch": 0.60998126839711, "grad_norm": 29.652578353881836, "learning_rate": 3.9001873160289e-07, "loss": 0.5698, "step": 4559 }, { "epoch": 0.6101150655606101, "grad_norm": 28.101097106933594, "learning_rate": 3.8988493443938987e-07, "loss": 0.2582, "step": 4560 }, { "epoch": 0.6102488627241103, "grad_norm": 27.88777732849121, "learning_rate": 3.897511372758897e-07, "loss": 0.5072, "step": 4561 }, { "epoch": 0.6103826598876104, "grad_norm": 42.37628936767578, "learning_rate": 3.8961734011238963e-07, "loss": 0.4718, "step": 4562 }, { "epoch": 0.6105164570511105, "grad_norm": 40.98026657104492, "learning_rate": 3.8948354294888945e-07, "loss": 0.5434, "step": 4563 }, { "epoch": 0.6106502542146106, "grad_norm": 45.3387451171875, "learning_rate": 3.8934974578538933e-07, "loss": 0.8051, "step": 4564 }, { "epoch": 0.6107840513781108, "grad_norm": 17.938350677490234, "learning_rate": 3.892159486218892e-07, "loss": 0.3281, "step": 4565 }, { "epoch": 0.6109178485416109, "grad_norm": 44.923526763916016, "learning_rate": 3.890821514583891e-07, "loss": 0.7248, "step": 4566 }, { "epoch": 0.6110516457051111, "grad_norm": 35.71216583251953, "learning_rate": 3.889483542948889e-07, "loss": 0.503, "step": 4567 }, { "epoch": 0.6111854428686112, "grad_norm": 21.903718948364258, "learning_rate": 3.888145571313888e-07, "loss": 0.3863, "step": 4568 }, { "epoch": 0.6113192400321114, "grad_norm": 35.91646957397461, "learning_rate": 3.886807599678887e-07, "loss": 0.5611, "step": 4569 }, { "epoch": 0.6114530371956115, "grad_norm": 54.20323944091797, "learning_rate": 3.8854696280438856e-07, "loss": 0.711, "step": 4570 }, { "epoch": 0.6115868343591115, "grad_norm": 48.00283432006836, "learning_rate": 3.884131656408884e-07, "loss": 0.4591, "step": 4571 }, { "epoch": 0.6117206315226117, "grad_norm": 21.939029693603516, "learning_rate": 3.8827936847738826e-07, "loss": 0.3126, "step": 4572 }, { "epoch": 0.6118544286861118, "grad_norm": 29.650432586669922, "learning_rate": 3.8814557131388814e-07, "loss": 0.3474, "step": 4573 }, { "epoch": 0.611988225849612, "grad_norm": 33.97514343261719, "learning_rate": 3.8801177415038797e-07, "loss": 0.3019, "step": 4574 }, { "epoch": 0.6121220230131121, "grad_norm": 21.787599563598633, "learning_rate": 3.8787797698688785e-07, "loss": 0.4084, "step": 4575 }, { "epoch": 0.6122558201766123, "grad_norm": 22.740741729736328, "learning_rate": 3.8774417982338773e-07, "loss": 0.1989, "step": 4576 }, { "epoch": 0.6123896173401124, "grad_norm": 20.190921783447266, "learning_rate": 3.876103826598876e-07, "loss": 0.2951, "step": 4577 }, { "epoch": 0.6125234145036125, "grad_norm": 31.93113899230957, "learning_rate": 3.8747658549638743e-07, "loss": 0.5603, "step": 4578 }, { "epoch": 0.6126572116671126, "grad_norm": 36.645835876464844, "learning_rate": 3.873427883328873e-07, "loss": 0.4669, "step": 4579 }, { "epoch": 0.6127910088306128, "grad_norm": 43.47810363769531, "learning_rate": 3.872089911693872e-07, "loss": 0.4733, "step": 4580 }, { "epoch": 0.6129248059941129, "grad_norm": 44.75178146362305, "learning_rate": 3.8707519400588707e-07, "loss": 0.6689, "step": 4581 }, { "epoch": 0.6130586031576131, "grad_norm": 24.903717041015625, "learning_rate": 3.869413968423869e-07, "loss": 0.4547, "step": 4582 }, { "epoch": 0.6131924003211132, "grad_norm": 33.01123809814453, "learning_rate": 3.868075996788868e-07, "loss": 0.3414, "step": 4583 }, { "epoch": 0.6133261974846134, "grad_norm": 28.587844848632812, "learning_rate": 3.8667380251538665e-07, "loss": 0.3699, "step": 4584 }, { "epoch": 0.6134599946481134, "grad_norm": 40.17137145996094, "learning_rate": 3.8654000535188653e-07, "loss": 0.6116, "step": 4585 }, { "epoch": 0.6135937918116136, "grad_norm": 34.722713470458984, "learning_rate": 3.8640620818838636e-07, "loss": 0.4792, "step": 4586 }, { "epoch": 0.6137275889751137, "grad_norm": 34.926673889160156, "learning_rate": 3.862724110248863e-07, "loss": 0.6133, "step": 4587 }, { "epoch": 0.6138613861386139, "grad_norm": 41.577335357666016, "learning_rate": 3.861386138613861e-07, "loss": 0.3158, "step": 4588 }, { "epoch": 0.613995183302114, "grad_norm": 32.58517074584961, "learning_rate": 3.86004816697886e-07, "loss": 0.4432, "step": 4589 }, { "epoch": 0.6141289804656141, "grad_norm": 19.468469619750977, "learning_rate": 3.858710195343858e-07, "loss": 0.4752, "step": 4590 }, { "epoch": 0.6142627776291143, "grad_norm": 17.396928787231445, "learning_rate": 3.8573722237088576e-07, "loss": 0.3271, "step": 4591 }, { "epoch": 0.6143965747926144, "grad_norm": 26.39650535583496, "learning_rate": 3.856034252073856e-07, "loss": 0.2579, "step": 4592 }, { "epoch": 0.6145303719561145, "grad_norm": 30.773130416870117, "learning_rate": 3.8546962804388546e-07, "loss": 0.4748, "step": 4593 }, { "epoch": 0.6146641691196146, "grad_norm": 42.24557876586914, "learning_rate": 3.853358308803853e-07, "loss": 0.3957, "step": 4594 }, { "epoch": 0.6147979662831148, "grad_norm": 21.94380760192871, "learning_rate": 3.852020337168852e-07, "loss": 0.5161, "step": 4595 }, { "epoch": 0.6149317634466149, "grad_norm": 39.710323333740234, "learning_rate": 3.8506823655338505e-07, "loss": 0.5803, "step": 4596 }, { "epoch": 0.6150655606101151, "grad_norm": 23.06785774230957, "learning_rate": 3.8493443938988493e-07, "loss": 0.3915, "step": 4597 }, { "epoch": 0.6151993577736152, "grad_norm": 18.666915893554688, "learning_rate": 3.848006422263848e-07, "loss": 0.3263, "step": 4598 }, { "epoch": 0.6153331549371154, "grad_norm": 28.243898391723633, "learning_rate": 3.846668450628847e-07, "loss": 0.4721, "step": 4599 }, { "epoch": 0.6154669521006154, "grad_norm": 32.031333923339844, "learning_rate": 3.845330478993845e-07, "loss": 0.3596, "step": 4600 }, { "epoch": 0.6156007492641156, "grad_norm": 39.220703125, "learning_rate": 3.8439925073588434e-07, "loss": 0.5329, "step": 4601 }, { "epoch": 0.6157345464276157, "grad_norm": 36.49979782104492, "learning_rate": 3.8426545357238427e-07, "loss": 0.3584, "step": 4602 }, { "epoch": 0.6158683435911159, "grad_norm": 29.653972625732422, "learning_rate": 3.841316564088841e-07, "loss": 0.455, "step": 4603 }, { "epoch": 0.616002140754616, "grad_norm": 30.247535705566406, "learning_rate": 3.83997859245384e-07, "loss": 0.3815, "step": 4604 }, { "epoch": 0.6161359379181162, "grad_norm": 17.046344757080078, "learning_rate": 3.838640620818838e-07, "loss": 0.2699, "step": 4605 }, { "epoch": 0.6162697350816163, "grad_norm": 42.10260009765625, "learning_rate": 3.8373026491838373e-07, "loss": 0.4687, "step": 4606 }, { "epoch": 0.6164035322451165, "grad_norm": 48.48081970214844, "learning_rate": 3.8359646775488356e-07, "loss": 0.6372, "step": 4607 }, { "epoch": 0.6165373294086165, "grad_norm": 15.76856517791748, "learning_rate": 3.8346267059138344e-07, "loss": 0.3447, "step": 4608 }, { "epoch": 0.6166711265721166, "grad_norm": 13.155765533447266, "learning_rate": 3.833288734278833e-07, "loss": 0.1692, "step": 4609 }, { "epoch": 0.6168049237356168, "grad_norm": 30.905258178710938, "learning_rate": 3.831950762643832e-07, "loss": 0.3203, "step": 4610 }, { "epoch": 0.6169387208991169, "grad_norm": 30.408742904663086, "learning_rate": 3.83061279100883e-07, "loss": 0.3586, "step": 4611 }, { "epoch": 0.6170725180626171, "grad_norm": 21.76311683654785, "learning_rate": 3.829274819373829e-07, "loss": 0.5155, "step": 4612 }, { "epoch": 0.6172063152261172, "grad_norm": 18.514469146728516, "learning_rate": 3.827936847738828e-07, "loss": 0.3086, "step": 4613 }, { "epoch": 0.6173401123896174, "grad_norm": 16.84125328063965, "learning_rate": 3.8265988761038266e-07, "loss": 0.4459, "step": 4614 }, { "epoch": 0.6174739095531174, "grad_norm": 17.71497344970703, "learning_rate": 3.825260904468825e-07, "loss": 0.3326, "step": 4615 }, { "epoch": 0.6176077067166176, "grad_norm": 36.51194763183594, "learning_rate": 3.8239229328338237e-07, "loss": 0.4157, "step": 4616 }, { "epoch": 0.6177415038801177, "grad_norm": 25.16452407836914, "learning_rate": 3.8225849611988225e-07, "loss": 0.4252, "step": 4617 }, { "epoch": 0.6178753010436179, "grad_norm": 25.00906753540039, "learning_rate": 3.8212469895638213e-07, "loss": 0.4879, "step": 4618 }, { "epoch": 0.618009098207118, "grad_norm": 51.22432327270508, "learning_rate": 3.8199090179288195e-07, "loss": 0.7524, "step": 4619 }, { "epoch": 0.6181428953706182, "grad_norm": 20.931346893310547, "learning_rate": 3.818571046293819e-07, "loss": 0.4409, "step": 4620 }, { "epoch": 0.6182766925341183, "grad_norm": 29.125017166137695, "learning_rate": 3.817233074658817e-07, "loss": 0.51, "step": 4621 }, { "epoch": 0.6184104896976184, "grad_norm": 24.263792037963867, "learning_rate": 3.815895103023816e-07, "loss": 0.2304, "step": 4622 }, { "epoch": 0.6185442868611185, "grad_norm": 45.19330596923828, "learning_rate": 3.814557131388814e-07, "loss": 0.4238, "step": 4623 }, { "epoch": 0.6186780840246187, "grad_norm": 26.9657039642334, "learning_rate": 3.8132191597538135e-07, "loss": 0.5204, "step": 4624 }, { "epoch": 0.6188118811881188, "grad_norm": 25.72755241394043, "learning_rate": 3.811881188118812e-07, "loss": 0.3399, "step": 4625 }, { "epoch": 0.618945678351619, "grad_norm": 42.93172836303711, "learning_rate": 3.81054321648381e-07, "loss": 0.5163, "step": 4626 }, { "epoch": 0.6190794755151191, "grad_norm": 28.564964294433594, "learning_rate": 3.809205244848809e-07, "loss": 0.494, "step": 4627 }, { "epoch": 0.6192132726786193, "grad_norm": 28.262161254882812, "learning_rate": 3.8078672732138076e-07, "loss": 0.3949, "step": 4628 }, { "epoch": 0.6193470698421194, "grad_norm": 17.63142204284668, "learning_rate": 3.8065293015788064e-07, "loss": 0.2044, "step": 4629 }, { "epoch": 0.6194808670056194, "grad_norm": 36.00946044921875, "learning_rate": 3.8051913299438047e-07, "loss": 0.4311, "step": 4630 }, { "epoch": 0.6196146641691196, "grad_norm": 30.61284828186035, "learning_rate": 3.803853358308804e-07, "loss": 0.3908, "step": 4631 }, { "epoch": 0.6197484613326197, "grad_norm": 31.58426856994629, "learning_rate": 3.802515386673802e-07, "loss": 0.3579, "step": 4632 }, { "epoch": 0.6198822584961199, "grad_norm": 27.478931427001953, "learning_rate": 3.801177415038801e-07, "loss": 0.4226, "step": 4633 }, { "epoch": 0.62001605565962, "grad_norm": 24.293041229248047, "learning_rate": 3.7998394434037993e-07, "loss": 0.4515, "step": 4634 }, { "epoch": 0.6201498528231202, "grad_norm": 18.860233306884766, "learning_rate": 3.7985014717687986e-07, "loss": 0.3767, "step": 4635 }, { "epoch": 0.6202836499866203, "grad_norm": 17.225309371948242, "learning_rate": 3.797163500133797e-07, "loss": 0.1923, "step": 4636 }, { "epoch": 0.6204174471501204, "grad_norm": 21.235280990600586, "learning_rate": 3.7958255284987957e-07, "loss": 0.3417, "step": 4637 }, { "epoch": 0.6205512443136205, "grad_norm": 35.220027923583984, "learning_rate": 3.794487556863794e-07, "loss": 0.488, "step": 4638 }, { "epoch": 0.6206850414771207, "grad_norm": 51.23119354248047, "learning_rate": 3.7931495852287933e-07, "loss": 0.4718, "step": 4639 }, { "epoch": 0.6208188386406208, "grad_norm": 37.1909065246582, "learning_rate": 3.7918116135937915e-07, "loss": 0.4097, "step": 4640 }, { "epoch": 0.620952635804121, "grad_norm": 23.521535873413086, "learning_rate": 3.7904736419587903e-07, "loss": 0.3156, "step": 4641 }, { "epoch": 0.6210864329676211, "grad_norm": 27.557758331298828, "learning_rate": 3.789135670323789e-07, "loss": 0.4163, "step": 4642 }, { "epoch": 0.6212202301311213, "grad_norm": 53.59572982788086, "learning_rate": 3.787797698688788e-07, "loss": 0.6676, "step": 4643 }, { "epoch": 0.6213540272946213, "grad_norm": 18.863508224487305, "learning_rate": 3.786459727053786e-07, "loss": 0.1729, "step": 4644 }, { "epoch": 0.6214878244581215, "grad_norm": 29.373794555664062, "learning_rate": 3.785121755418785e-07, "loss": 0.3223, "step": 4645 }, { "epoch": 0.6216216216216216, "grad_norm": 38.5555305480957, "learning_rate": 3.783783783783784e-07, "loss": 0.5271, "step": 4646 }, { "epoch": 0.6217554187851217, "grad_norm": 12.275238990783691, "learning_rate": 3.7824458121487825e-07, "loss": 0.1399, "step": 4647 }, { "epoch": 0.6218892159486219, "grad_norm": 33.981781005859375, "learning_rate": 3.781107840513781e-07, "loss": 0.4034, "step": 4648 }, { "epoch": 0.622023013112122, "grad_norm": 40.794925689697266, "learning_rate": 3.7797698688787796e-07, "loss": 0.3685, "step": 4649 }, { "epoch": 0.6221568102756222, "grad_norm": 29.435882568359375, "learning_rate": 3.7784318972437784e-07, "loss": 0.3963, "step": 4650 }, { "epoch": 0.6222906074391223, "grad_norm": 31.260103225708008, "learning_rate": 3.777093925608777e-07, "loss": 0.3538, "step": 4651 }, { "epoch": 0.6224244046026224, "grad_norm": 19.296911239624023, "learning_rate": 3.7757559539737755e-07, "loss": 0.3337, "step": 4652 }, { "epoch": 0.6225582017661225, "grad_norm": 25.168603897094727, "learning_rate": 3.774417982338774e-07, "loss": 0.3301, "step": 4653 }, { "epoch": 0.6226919989296227, "grad_norm": 40.78619384765625, "learning_rate": 3.773080010703773e-07, "loss": 0.5668, "step": 4654 }, { "epoch": 0.6228257960931228, "grad_norm": 27.20401954650879, "learning_rate": 3.7717420390687713e-07, "loss": 0.4238, "step": 4655 }, { "epoch": 0.622959593256623, "grad_norm": 22.116065979003906, "learning_rate": 3.77040406743377e-07, "loss": 0.47, "step": 4656 }, { "epoch": 0.6230933904201231, "grad_norm": 30.856815338134766, "learning_rate": 3.769066095798769e-07, "loss": 0.5222, "step": 4657 }, { "epoch": 0.6232271875836233, "grad_norm": 17.36068344116211, "learning_rate": 3.7677281241637677e-07, "loss": 0.2905, "step": 4658 }, { "epoch": 0.6233609847471233, "grad_norm": 37.55425262451172, "learning_rate": 3.766390152528766e-07, "loss": 0.6027, "step": 4659 }, { "epoch": 0.6234947819106235, "grad_norm": 18.795461654663086, "learning_rate": 3.765052180893765e-07, "loss": 0.1591, "step": 4660 }, { "epoch": 0.6236285790741236, "grad_norm": 29.40082550048828, "learning_rate": 3.7637142092587635e-07, "loss": 0.164, "step": 4661 }, { "epoch": 0.6237623762376238, "grad_norm": 18.383014678955078, "learning_rate": 3.7623762376237623e-07, "loss": 0.2654, "step": 4662 }, { "epoch": 0.6238961734011239, "grad_norm": 33.14194869995117, "learning_rate": 3.7610382659887606e-07, "loss": 0.4117, "step": 4663 }, { "epoch": 0.6240299705646241, "grad_norm": 32.02007293701172, "learning_rate": 3.75970029435376e-07, "loss": 0.2323, "step": 4664 }, { "epoch": 0.6241637677281242, "grad_norm": 32.48596954345703, "learning_rate": 3.758362322718758e-07, "loss": 0.4551, "step": 4665 }, { "epoch": 0.6242975648916242, "grad_norm": 32.85911178588867, "learning_rate": 3.757024351083757e-07, "loss": 0.296, "step": 4666 }, { "epoch": 0.6244313620551244, "grad_norm": 27.132461547851562, "learning_rate": 3.755686379448755e-07, "loss": 0.4494, "step": 4667 }, { "epoch": 0.6245651592186245, "grad_norm": 32.077518463134766, "learning_rate": 3.7543484078137545e-07, "loss": 0.2599, "step": 4668 }, { "epoch": 0.6246989563821247, "grad_norm": 43.71591567993164, "learning_rate": 3.753010436178753e-07, "loss": 0.4574, "step": 4669 }, { "epoch": 0.6248327535456248, "grad_norm": 49.65814208984375, "learning_rate": 3.7516724645437516e-07, "loss": 0.5684, "step": 4670 }, { "epoch": 0.624966550709125, "grad_norm": 36.12547302246094, "learning_rate": 3.75033449290875e-07, "loss": 0.3191, "step": 4671 }, { "epoch": 0.6251003478726251, "grad_norm": 24.714391708374023, "learning_rate": 3.748996521273749e-07, "loss": 0.378, "step": 4672 }, { "epoch": 0.6252341450361253, "grad_norm": 26.330642700195312, "learning_rate": 3.7476585496387475e-07, "loss": 0.3774, "step": 4673 }, { "epoch": 0.6253679421996253, "grad_norm": 20.747819900512695, "learning_rate": 3.746320578003746e-07, "loss": 0.3147, "step": 4674 }, { "epoch": 0.6255017393631255, "grad_norm": 22.37615394592285, "learning_rate": 3.744982606368745e-07, "loss": 0.1905, "step": 4675 }, { "epoch": 0.6256355365266256, "grad_norm": 31.6766357421875, "learning_rate": 3.743644634733744e-07, "loss": 0.4742, "step": 4676 }, { "epoch": 0.6257693336901258, "grad_norm": 21.447589874267578, "learning_rate": 3.742306663098742e-07, "loss": 0.3495, "step": 4677 }, { "epoch": 0.6259031308536259, "grad_norm": 49.64288330078125, "learning_rate": 3.7409686914637404e-07, "loss": 0.4465, "step": 4678 }, { "epoch": 0.6260369280171261, "grad_norm": 35.71611022949219, "learning_rate": 3.7396307198287397e-07, "loss": 0.5347, "step": 4679 }, { "epoch": 0.6261707251806262, "grad_norm": 30.182838439941406, "learning_rate": 3.738292748193738e-07, "loss": 0.4301, "step": 4680 }, { "epoch": 0.6263045223441263, "grad_norm": 53.1832275390625, "learning_rate": 3.736954776558737e-07, "loss": 0.6141, "step": 4681 }, { "epoch": 0.6264383195076264, "grad_norm": 31.00541877746582, "learning_rate": 3.735616804923735e-07, "loss": 0.5488, "step": 4682 }, { "epoch": 0.6265721166711266, "grad_norm": 28.490074157714844, "learning_rate": 3.7342788332887343e-07, "loss": 0.437, "step": 4683 }, { "epoch": 0.6267059138346267, "grad_norm": 43.66949462890625, "learning_rate": 3.7329408616537326e-07, "loss": 0.3176, "step": 4684 }, { "epoch": 0.6268397109981269, "grad_norm": 40.09960174560547, "learning_rate": 3.7316028900187314e-07, "loss": 0.4736, "step": 4685 }, { "epoch": 0.626973508161627, "grad_norm": 41.38792419433594, "learning_rate": 3.73026491838373e-07, "loss": 0.4957, "step": 4686 }, { "epoch": 0.6271073053251272, "grad_norm": 22.050752639770508, "learning_rate": 3.728926946748729e-07, "loss": 0.3059, "step": 4687 }, { "epoch": 0.6272411024886272, "grad_norm": 40.4247932434082, "learning_rate": 3.727588975113727e-07, "loss": 0.5366, "step": 4688 }, { "epoch": 0.6273748996521273, "grad_norm": 22.162494659423828, "learning_rate": 3.726251003478726e-07, "loss": 0.515, "step": 4689 }, { "epoch": 0.6275086968156275, "grad_norm": 42.16183853149414, "learning_rate": 3.724913031843725e-07, "loss": 0.4546, "step": 4690 }, { "epoch": 0.6276424939791276, "grad_norm": 35.00616455078125, "learning_rate": 3.7235750602087236e-07, "loss": 0.671, "step": 4691 }, { "epoch": 0.6277762911426278, "grad_norm": 18.109905242919922, "learning_rate": 3.722237088573722e-07, "loss": 0.2903, "step": 4692 }, { "epoch": 0.6279100883061279, "grad_norm": 36.816810607910156, "learning_rate": 3.7208991169387207e-07, "loss": 0.31, "step": 4693 }, { "epoch": 0.6280438854696281, "grad_norm": 33.460689544677734, "learning_rate": 3.7195611453037195e-07, "loss": 0.4997, "step": 4694 }, { "epoch": 0.6281776826331282, "grad_norm": 36.88679504394531, "learning_rate": 3.718223173668718e-07, "loss": 0.276, "step": 4695 }, { "epoch": 0.6283114797966283, "grad_norm": 24.761919021606445, "learning_rate": 3.7168852020337165e-07, "loss": 0.3298, "step": 4696 }, { "epoch": 0.6284452769601284, "grad_norm": 31.532136917114258, "learning_rate": 3.715547230398716e-07, "loss": 0.3929, "step": 4697 }, { "epoch": 0.6285790741236286, "grad_norm": 32.842891693115234, "learning_rate": 3.714209258763714e-07, "loss": 0.4121, "step": 4698 }, { "epoch": 0.6287128712871287, "grad_norm": 25.299291610717773, "learning_rate": 3.712871287128713e-07, "loss": 0.3862, "step": 4699 }, { "epoch": 0.6288466684506289, "grad_norm": 15.26400089263916, "learning_rate": 3.711533315493711e-07, "loss": 0.243, "step": 4700 }, { "epoch": 0.628980465614129, "grad_norm": 21.19873809814453, "learning_rate": 3.7101953438587105e-07, "loss": 0.3176, "step": 4701 }, { "epoch": 0.6291142627776292, "grad_norm": 31.004070281982422, "learning_rate": 3.708857372223709e-07, "loss": 0.5108, "step": 4702 }, { "epoch": 0.6292480599411292, "grad_norm": 20.39217185974121, "learning_rate": 3.7075194005887075e-07, "loss": 0.2958, "step": 4703 }, { "epoch": 0.6293818571046293, "grad_norm": 19.517635345458984, "learning_rate": 3.706181428953706e-07, "loss": 0.3168, "step": 4704 }, { "epoch": 0.6295156542681295, "grad_norm": 22.869468688964844, "learning_rate": 3.7048434573187046e-07, "loss": 0.5351, "step": 4705 }, { "epoch": 0.6296494514316296, "grad_norm": 33.476314544677734, "learning_rate": 3.7035054856837034e-07, "loss": 0.5026, "step": 4706 }, { "epoch": 0.6297832485951298, "grad_norm": 33.76482391357422, "learning_rate": 3.7021675140487016e-07, "loss": 0.4136, "step": 4707 }, { "epoch": 0.62991704575863, "grad_norm": 29.361257553100586, "learning_rate": 3.700829542413701e-07, "loss": 0.5045, "step": 4708 }, { "epoch": 0.6300508429221301, "grad_norm": 33.176822662353516, "learning_rate": 3.699491570778699e-07, "loss": 0.6563, "step": 4709 }, { "epoch": 0.6301846400856301, "grad_norm": 27.542043685913086, "learning_rate": 3.698153599143698e-07, "loss": 0.3661, "step": 4710 }, { "epoch": 0.6303184372491303, "grad_norm": 20.910436630249023, "learning_rate": 3.6968156275086963e-07, "loss": 0.2634, "step": 4711 }, { "epoch": 0.6304522344126304, "grad_norm": 36.951786041259766, "learning_rate": 3.6954776558736956e-07, "loss": 0.7652, "step": 4712 }, { "epoch": 0.6305860315761306, "grad_norm": 27.61446762084961, "learning_rate": 3.694139684238694e-07, "loss": 0.5469, "step": 4713 }, { "epoch": 0.6307198287396307, "grad_norm": 24.57452964782715, "learning_rate": 3.6928017126036927e-07, "loss": 0.2392, "step": 4714 }, { "epoch": 0.6308536259031309, "grad_norm": 26.910842895507812, "learning_rate": 3.691463740968691e-07, "loss": 0.3122, "step": 4715 }, { "epoch": 0.630987423066631, "grad_norm": 15.720381736755371, "learning_rate": 3.69012576933369e-07, "loss": 0.3392, "step": 4716 }, { "epoch": 0.6311212202301312, "grad_norm": 35.56986618041992, "learning_rate": 3.6887877976986885e-07, "loss": 0.3207, "step": 4717 }, { "epoch": 0.6312550173936312, "grad_norm": 54.2183837890625, "learning_rate": 3.6874498260636873e-07, "loss": 0.695, "step": 4718 }, { "epoch": 0.6313888145571314, "grad_norm": 19.502765655517578, "learning_rate": 3.686111854428686e-07, "loss": 0.3545, "step": 4719 }, { "epoch": 0.6315226117206315, "grad_norm": 25.181488037109375, "learning_rate": 3.684773882793685e-07, "loss": 0.4743, "step": 4720 }, { "epoch": 0.6316564088841317, "grad_norm": 33.70606231689453, "learning_rate": 3.683435911158683e-07, "loss": 0.6108, "step": 4721 }, { "epoch": 0.6317902060476318, "grad_norm": 21.99226951599121, "learning_rate": 3.682097939523682e-07, "loss": 0.3339, "step": 4722 }, { "epoch": 0.631924003211132, "grad_norm": 24.47477912902832, "learning_rate": 3.6807599678886807e-07, "loss": 0.3951, "step": 4723 }, { "epoch": 0.6320578003746321, "grad_norm": 30.215251922607422, "learning_rate": 3.6794219962536795e-07, "loss": 0.494, "step": 4724 }, { "epoch": 0.6321915975381321, "grad_norm": 21.154273986816406, "learning_rate": 3.678084024618678e-07, "loss": 0.3577, "step": 4725 }, { "epoch": 0.6323253947016323, "grad_norm": 40.42019271850586, "learning_rate": 3.6767460529836766e-07, "loss": 0.5667, "step": 4726 }, { "epoch": 0.6324591918651324, "grad_norm": 26.732715606689453, "learning_rate": 3.6754080813486754e-07, "loss": 0.3284, "step": 4727 }, { "epoch": 0.6325929890286326, "grad_norm": 42.954681396484375, "learning_rate": 3.674070109713674e-07, "loss": 0.3819, "step": 4728 }, { "epoch": 0.6327267861921327, "grad_norm": 34.233985900878906, "learning_rate": 3.6727321380786724e-07, "loss": 0.4183, "step": 4729 }, { "epoch": 0.6328605833556329, "grad_norm": 27.6165771484375, "learning_rate": 3.671394166443672e-07, "loss": 0.3335, "step": 4730 }, { "epoch": 0.632994380519133, "grad_norm": 14.093342781066895, "learning_rate": 3.67005619480867e-07, "loss": 0.2648, "step": 4731 }, { "epoch": 0.6331281776826331, "grad_norm": 20.51238250732422, "learning_rate": 3.6687182231736683e-07, "loss": 0.3841, "step": 4732 }, { "epoch": 0.6332619748461332, "grad_norm": 21.70491600036621, "learning_rate": 3.667380251538667e-07, "loss": 0.3486, "step": 4733 }, { "epoch": 0.6333957720096334, "grad_norm": 32.156890869140625, "learning_rate": 3.666042279903666e-07, "loss": 0.5112, "step": 4734 }, { "epoch": 0.6335295691731335, "grad_norm": 16.08141326904297, "learning_rate": 3.6647043082686647e-07, "loss": 0.2739, "step": 4735 }, { "epoch": 0.6336633663366337, "grad_norm": 16.983501434326172, "learning_rate": 3.663366336633663e-07, "loss": 0.2509, "step": 4736 }, { "epoch": 0.6337971635001338, "grad_norm": 24.278860092163086, "learning_rate": 3.6620283649986617e-07, "loss": 0.3077, "step": 4737 }, { "epoch": 0.633930960663634, "grad_norm": 24.855701446533203, "learning_rate": 3.6606903933636605e-07, "loss": 0.3243, "step": 4738 }, { "epoch": 0.6340647578271341, "grad_norm": 28.898723602294922, "learning_rate": 3.6593524217286593e-07, "loss": 0.5951, "step": 4739 }, { "epoch": 0.6341985549906342, "grad_norm": 35.55670166015625, "learning_rate": 3.6580144500936576e-07, "loss": 0.247, "step": 4740 }, { "epoch": 0.6343323521541343, "grad_norm": 71.52902221679688, "learning_rate": 3.656676478458657e-07, "loss": 0.4946, "step": 4741 }, { "epoch": 0.6344661493176345, "grad_norm": 22.289142608642578, "learning_rate": 3.655338506823655e-07, "loss": 0.3554, "step": 4742 }, { "epoch": 0.6345999464811346, "grad_norm": 40.26486587524414, "learning_rate": 3.654000535188654e-07, "loss": 0.4162, "step": 4743 }, { "epoch": 0.6347337436446348, "grad_norm": 22.26603126525879, "learning_rate": 3.652662563553652e-07, "loss": 0.2124, "step": 4744 }, { "epoch": 0.6348675408081349, "grad_norm": 17.247533798217773, "learning_rate": 3.6513245919186515e-07, "loss": 0.2486, "step": 4745 }, { "epoch": 0.635001337971635, "grad_norm": 28.501718521118164, "learning_rate": 3.64998662028365e-07, "loss": 0.3781, "step": 4746 }, { "epoch": 0.6351351351351351, "grad_norm": 33.73472595214844, "learning_rate": 3.6486486486486486e-07, "loss": 0.451, "step": 4747 }, { "epoch": 0.6352689322986352, "grad_norm": 32.960052490234375, "learning_rate": 3.647310677013647e-07, "loss": 0.3144, "step": 4748 }, { "epoch": 0.6354027294621354, "grad_norm": 50.58157730102539, "learning_rate": 3.645972705378646e-07, "loss": 0.516, "step": 4749 }, { "epoch": 0.6355365266256355, "grad_norm": 25.209396362304688, "learning_rate": 3.6446347337436444e-07, "loss": 0.3148, "step": 4750 }, { "epoch": 0.6356703237891357, "grad_norm": 39.89286804199219, "learning_rate": 3.643296762108643e-07, "loss": 0.5056, "step": 4751 }, { "epoch": 0.6358041209526358, "grad_norm": 30.00689697265625, "learning_rate": 3.6419587904736415e-07, "loss": 0.6133, "step": 4752 }, { "epoch": 0.635937918116136, "grad_norm": 28.187511444091797, "learning_rate": 3.640620818838641e-07, "loss": 0.4087, "step": 4753 }, { "epoch": 0.636071715279636, "grad_norm": 43.10702896118164, "learning_rate": 3.639282847203639e-07, "loss": 0.601, "step": 4754 }, { "epoch": 0.6362055124431362, "grad_norm": 65.36869049072266, "learning_rate": 3.637944875568638e-07, "loss": 0.6126, "step": 4755 }, { "epoch": 0.6363393096066363, "grad_norm": 24.66992950439453, "learning_rate": 3.6366069039336367e-07, "loss": 0.4225, "step": 4756 }, { "epoch": 0.6364731067701365, "grad_norm": 40.28263854980469, "learning_rate": 3.635268932298635e-07, "loss": 0.7161, "step": 4757 }, { "epoch": 0.6366069039336366, "grad_norm": 25.533437728881836, "learning_rate": 3.6339309606636337e-07, "loss": 0.4818, "step": 4758 }, { "epoch": 0.6367407010971368, "grad_norm": 33.66526794433594, "learning_rate": 3.632592989028632e-07, "loss": 0.3304, "step": 4759 }, { "epoch": 0.6368744982606369, "grad_norm": 25.740318298339844, "learning_rate": 3.6312550173936313e-07, "loss": 0.4973, "step": 4760 }, { "epoch": 0.6370082954241371, "grad_norm": 25.740564346313477, "learning_rate": 3.6299170457586296e-07, "loss": 0.2022, "step": 4761 }, { "epoch": 0.6371420925876371, "grad_norm": 19.02604103088379, "learning_rate": 3.6285790741236284e-07, "loss": 0.2535, "step": 4762 }, { "epoch": 0.6372758897511372, "grad_norm": 49.29140090942383, "learning_rate": 3.6272411024886266e-07, "loss": 0.4123, "step": 4763 }, { "epoch": 0.6374096869146374, "grad_norm": 29.960874557495117, "learning_rate": 3.625903130853626e-07, "loss": 0.5435, "step": 4764 }, { "epoch": 0.6375434840781375, "grad_norm": 25.485572814941406, "learning_rate": 3.624565159218624e-07, "loss": 0.376, "step": 4765 }, { "epoch": 0.6376772812416377, "grad_norm": 25.0344181060791, "learning_rate": 3.623227187583623e-07, "loss": 0.6533, "step": 4766 }, { "epoch": 0.6378110784051378, "grad_norm": 26.50189971923828, "learning_rate": 3.621889215948622e-07, "loss": 0.5302, "step": 4767 }, { "epoch": 0.637944875568638, "grad_norm": 30.400617599487305, "learning_rate": 3.6205512443136206e-07, "loss": 0.3955, "step": 4768 }, { "epoch": 0.638078672732138, "grad_norm": 69.35574340820312, "learning_rate": 3.619213272678619e-07, "loss": 0.3734, "step": 4769 }, { "epoch": 0.6382124698956382, "grad_norm": 33.99388885498047, "learning_rate": 3.6178753010436176e-07, "loss": 0.4746, "step": 4770 }, { "epoch": 0.6383462670591383, "grad_norm": 26.101253509521484, "learning_rate": 3.6165373294086164e-07, "loss": 0.4668, "step": 4771 }, { "epoch": 0.6384800642226385, "grad_norm": 22.2539119720459, "learning_rate": 3.615199357773615e-07, "loss": 0.4521, "step": 4772 }, { "epoch": 0.6386138613861386, "grad_norm": 39.47792053222656, "learning_rate": 3.6138613861386135e-07, "loss": 0.6166, "step": 4773 }, { "epoch": 0.6387476585496388, "grad_norm": 25.548603057861328, "learning_rate": 3.6125234145036123e-07, "loss": 0.3557, "step": 4774 }, { "epoch": 0.6388814557131389, "grad_norm": 14.746726989746094, "learning_rate": 3.611185442868611e-07, "loss": 0.2842, "step": 4775 }, { "epoch": 0.639015252876639, "grad_norm": 34.866092681884766, "learning_rate": 3.60984747123361e-07, "loss": 0.6245, "step": 4776 }, { "epoch": 0.6391490500401391, "grad_norm": 26.83434295654297, "learning_rate": 3.608509499598608e-07, "loss": 0.2288, "step": 4777 }, { "epoch": 0.6392828472036393, "grad_norm": 28.54389190673828, "learning_rate": 3.6071715279636075e-07, "loss": 0.6473, "step": 4778 }, { "epoch": 0.6394166443671394, "grad_norm": 20.31393051147461, "learning_rate": 3.6058335563286057e-07, "loss": 0.3173, "step": 4779 }, { "epoch": 0.6395504415306396, "grad_norm": 21.6796932220459, "learning_rate": 3.6044955846936045e-07, "loss": 0.3793, "step": 4780 }, { "epoch": 0.6396842386941397, "grad_norm": 22.953454971313477, "learning_rate": 3.603157613058603e-07, "loss": 0.3409, "step": 4781 }, { "epoch": 0.6398180358576399, "grad_norm": 25.633163452148438, "learning_rate": 3.601819641423602e-07, "loss": 0.4508, "step": 4782 }, { "epoch": 0.63995183302114, "grad_norm": 17.43596649169922, "learning_rate": 3.6004816697886004e-07, "loss": 0.4554, "step": 4783 }, { "epoch": 0.64008563018464, "grad_norm": 19.01993751525879, "learning_rate": 3.5991436981535986e-07, "loss": 0.4005, "step": 4784 }, { "epoch": 0.6402194273481402, "grad_norm": 22.90517807006836, "learning_rate": 3.5978057265185974e-07, "loss": 0.3361, "step": 4785 }, { "epoch": 0.6403532245116403, "grad_norm": 35.39620590209961, "learning_rate": 3.596467754883596e-07, "loss": 0.5074, "step": 4786 }, { "epoch": 0.6404870216751405, "grad_norm": 21.04572105407715, "learning_rate": 3.595129783248595e-07, "loss": 0.3066, "step": 4787 }, { "epoch": 0.6406208188386406, "grad_norm": 26.833080291748047, "learning_rate": 3.593791811613593e-07, "loss": 0.4092, "step": 4788 }, { "epoch": 0.6407546160021408, "grad_norm": 48.4040641784668, "learning_rate": 3.5924538399785926e-07, "loss": 0.4471, "step": 4789 }, { "epoch": 0.6408884131656409, "grad_norm": 17.188297271728516, "learning_rate": 3.591115868343591e-07, "loss": 0.3206, "step": 4790 }, { "epoch": 0.641022210329141, "grad_norm": 33.09953689575195, "learning_rate": 3.5897778967085896e-07, "loss": 0.4503, "step": 4791 }, { "epoch": 0.6411560074926411, "grad_norm": 23.860918045043945, "learning_rate": 3.588439925073588e-07, "loss": 0.3741, "step": 4792 }, { "epoch": 0.6412898046561413, "grad_norm": 38.089759826660156, "learning_rate": 3.587101953438587e-07, "loss": 0.4808, "step": 4793 }, { "epoch": 0.6414236018196414, "grad_norm": 15.911900520324707, "learning_rate": 3.5857639818035855e-07, "loss": 0.37, "step": 4794 }, { "epoch": 0.6415573989831416, "grad_norm": 28.440736770629883, "learning_rate": 3.5844260101685843e-07, "loss": 0.4187, "step": 4795 }, { "epoch": 0.6416911961466417, "grad_norm": 41.294559478759766, "learning_rate": 3.5830880385335825e-07, "loss": 0.4609, "step": 4796 }, { "epoch": 0.6418249933101419, "grad_norm": 22.961923599243164, "learning_rate": 3.581750066898582e-07, "loss": 0.2213, "step": 4797 }, { "epoch": 0.6419587904736419, "grad_norm": 21.515390396118164, "learning_rate": 3.58041209526358e-07, "loss": 0.4022, "step": 4798 }, { "epoch": 0.642092587637142, "grad_norm": 18.02958106994629, "learning_rate": 3.579074123628579e-07, "loss": 0.4069, "step": 4799 }, { "epoch": 0.6422263848006422, "grad_norm": 25.05667495727539, "learning_rate": 3.5777361519935777e-07, "loss": 0.4363, "step": 4800 }, { "epoch": 0.6423601819641424, "grad_norm": 20.381189346313477, "learning_rate": 3.5763981803585765e-07, "loss": 0.3392, "step": 4801 }, { "epoch": 0.6424939791276425, "grad_norm": 29.670711517333984, "learning_rate": 3.575060208723575e-07, "loss": 0.3189, "step": 4802 }, { "epoch": 0.6426277762911426, "grad_norm": 25.383813858032227, "learning_rate": 3.5737222370885736e-07, "loss": 0.3572, "step": 4803 }, { "epoch": 0.6427615734546428, "grad_norm": 37.44811248779297, "learning_rate": 3.5723842654535724e-07, "loss": 0.6419, "step": 4804 }, { "epoch": 0.642895370618143, "grad_norm": 17.547945022583008, "learning_rate": 3.571046293818571e-07, "loss": 0.2763, "step": 4805 }, { "epoch": 0.643029167781643, "grad_norm": 15.776501655578613, "learning_rate": 3.5697083221835694e-07, "loss": 0.3226, "step": 4806 }, { "epoch": 0.6431629649451431, "grad_norm": 49.96028518676758, "learning_rate": 3.568370350548568e-07, "loss": 0.7081, "step": 4807 }, { "epoch": 0.6432967621086433, "grad_norm": 29.441194534301758, "learning_rate": 3.567032378913567e-07, "loss": 0.4721, "step": 4808 }, { "epoch": 0.6434305592721434, "grad_norm": 22.125476837158203, "learning_rate": 3.565694407278565e-07, "loss": 0.3962, "step": 4809 }, { "epoch": 0.6435643564356436, "grad_norm": 18.771390914916992, "learning_rate": 3.564356435643564e-07, "loss": 0.285, "step": 4810 }, { "epoch": 0.6436981535991437, "grad_norm": 37.226688385009766, "learning_rate": 3.563018464008563e-07, "loss": 0.5742, "step": 4811 }, { "epoch": 0.6438319507626439, "grad_norm": 27.125436782836914, "learning_rate": 3.5616804923735616e-07, "loss": 0.3842, "step": 4812 }, { "epoch": 0.6439657479261439, "grad_norm": 14.675829887390137, "learning_rate": 3.56034252073856e-07, "loss": 0.2192, "step": 4813 }, { "epoch": 0.6440995450896441, "grad_norm": 25.99903678894043, "learning_rate": 3.5590045491035587e-07, "loss": 0.3448, "step": 4814 }, { "epoch": 0.6442333422531442, "grad_norm": 26.876941680908203, "learning_rate": 3.5576665774685575e-07, "loss": 0.3213, "step": 4815 }, { "epoch": 0.6443671394166444, "grad_norm": 21.06963348388672, "learning_rate": 3.5563286058335563e-07, "loss": 0.3531, "step": 4816 }, { "epoch": 0.6445009365801445, "grad_norm": 27.800689697265625, "learning_rate": 3.5549906341985545e-07, "loss": 0.363, "step": 4817 }, { "epoch": 0.6446347337436447, "grad_norm": 24.6253719329834, "learning_rate": 3.5536526625635533e-07, "loss": 0.5109, "step": 4818 }, { "epoch": 0.6447685309071448, "grad_norm": 40.76906204223633, "learning_rate": 3.552314690928552e-07, "loss": 0.3082, "step": 4819 }, { "epoch": 0.6449023280706448, "grad_norm": 26.016910552978516, "learning_rate": 3.550976719293551e-07, "loss": 0.3338, "step": 4820 }, { "epoch": 0.645036125234145, "grad_norm": 26.433513641357422, "learning_rate": 3.549638747658549e-07, "loss": 0.4968, "step": 4821 }, { "epoch": 0.6451699223976451, "grad_norm": 21.28926658630371, "learning_rate": 3.5483007760235485e-07, "loss": 0.503, "step": 4822 }, { "epoch": 0.6453037195611453, "grad_norm": 29.003812789916992, "learning_rate": 3.546962804388547e-07, "loss": 0.4313, "step": 4823 }, { "epoch": 0.6454375167246454, "grad_norm": 25.476537704467773, "learning_rate": 3.5456248327535456e-07, "loss": 0.5235, "step": 4824 }, { "epoch": 0.6455713138881456, "grad_norm": 21.689254760742188, "learning_rate": 3.544286861118544e-07, "loss": 0.4671, "step": 4825 }, { "epoch": 0.6457051110516457, "grad_norm": 70.73736572265625, "learning_rate": 3.542948889483543e-07, "loss": 0.84, "step": 4826 }, { "epoch": 0.6458389082151459, "grad_norm": 28.365663528442383, "learning_rate": 3.5416109178485414e-07, "loss": 0.5437, "step": 4827 }, { "epoch": 0.6459727053786459, "grad_norm": 23.97479820251465, "learning_rate": 3.54027294621354e-07, "loss": 0.3617, "step": 4828 }, { "epoch": 0.6461065025421461, "grad_norm": 22.287517547607422, "learning_rate": 3.5389349745785385e-07, "loss": 0.3928, "step": 4829 }, { "epoch": 0.6462402997056462, "grad_norm": 18.756141662597656, "learning_rate": 3.537597002943538e-07, "loss": 0.4509, "step": 4830 }, { "epoch": 0.6463740968691464, "grad_norm": 29.214679718017578, "learning_rate": 3.536259031308536e-07, "loss": 0.302, "step": 4831 }, { "epoch": 0.6465078940326465, "grad_norm": 40.23711013793945, "learning_rate": 3.534921059673535e-07, "loss": 0.8077, "step": 4832 }, { "epoch": 0.6466416911961467, "grad_norm": 21.77651596069336, "learning_rate": 3.5335830880385336e-07, "loss": 0.4007, "step": 4833 }, { "epoch": 0.6467754883596468, "grad_norm": 19.523164749145508, "learning_rate": 3.5322451164035324e-07, "loss": 0.3663, "step": 4834 }, { "epoch": 0.6469092855231469, "grad_norm": 25.069561004638672, "learning_rate": 3.5309071447685307e-07, "loss": 0.3437, "step": 4835 }, { "epoch": 0.647043082686647, "grad_norm": 26.027456283569336, "learning_rate": 3.529569173133529e-07, "loss": 0.4858, "step": 4836 }, { "epoch": 0.6471768798501472, "grad_norm": 18.166500091552734, "learning_rate": 3.5282312014985283e-07, "loss": 0.3827, "step": 4837 }, { "epoch": 0.6473106770136473, "grad_norm": 20.97589683532715, "learning_rate": 3.5268932298635265e-07, "loss": 0.4151, "step": 4838 }, { "epoch": 0.6474444741771475, "grad_norm": 24.858760833740234, "learning_rate": 3.5255552582285253e-07, "loss": 0.429, "step": 4839 }, { "epoch": 0.6475782713406476, "grad_norm": 58.983421325683594, "learning_rate": 3.5242172865935236e-07, "loss": 0.6329, "step": 4840 }, { "epoch": 0.6477120685041478, "grad_norm": 25.06791114807129, "learning_rate": 3.522879314958523e-07, "loss": 0.4479, "step": 4841 }, { "epoch": 0.6478458656676478, "grad_norm": 32.773048400878906, "learning_rate": 3.521541343323521e-07, "loss": 0.4441, "step": 4842 }, { "epoch": 0.6479796628311479, "grad_norm": 44.85996627807617, "learning_rate": 3.52020337168852e-07, "loss": 0.4534, "step": 4843 }, { "epoch": 0.6481134599946481, "grad_norm": 27.228670120239258, "learning_rate": 3.518865400053519e-07, "loss": 0.3725, "step": 4844 }, { "epoch": 0.6482472571581482, "grad_norm": 18.45728874206543, "learning_rate": 3.5175274284185176e-07, "loss": 0.3182, "step": 4845 }, { "epoch": 0.6483810543216484, "grad_norm": 24.72555160522461, "learning_rate": 3.516189456783516e-07, "loss": 0.4969, "step": 4846 }, { "epoch": 0.6485148514851485, "grad_norm": 23.088762283325195, "learning_rate": 3.5148514851485146e-07, "loss": 0.2903, "step": 4847 }, { "epoch": 0.6486486486486487, "grad_norm": 22.75870132446289, "learning_rate": 3.5135135135135134e-07, "loss": 0.2428, "step": 4848 }, { "epoch": 0.6487824458121488, "grad_norm": 35.030887603759766, "learning_rate": 3.512175541878512e-07, "loss": 0.5892, "step": 4849 }, { "epoch": 0.6489162429756489, "grad_norm": 28.767423629760742, "learning_rate": 3.5108375702435105e-07, "loss": 0.4602, "step": 4850 }, { "epoch": 0.649050040139149, "grad_norm": 46.79442596435547, "learning_rate": 3.509499598608509e-07, "loss": 0.6008, "step": 4851 }, { "epoch": 0.6491838373026492, "grad_norm": 50.64055633544922, "learning_rate": 3.508161626973508e-07, "loss": 0.5721, "step": 4852 }, { "epoch": 0.6493176344661493, "grad_norm": 26.438861846923828, "learning_rate": 3.506823655338507e-07, "loss": 0.5101, "step": 4853 }, { "epoch": 0.6494514316296495, "grad_norm": 23.896297454833984, "learning_rate": 3.505485683703505e-07, "loss": 0.3866, "step": 4854 }, { "epoch": 0.6495852287931496, "grad_norm": 24.062456130981445, "learning_rate": 3.5041477120685044e-07, "loss": 0.3309, "step": 4855 }, { "epoch": 0.6497190259566498, "grad_norm": 31.059673309326172, "learning_rate": 3.5028097404335027e-07, "loss": 0.4529, "step": 4856 }, { "epoch": 0.6498528231201498, "grad_norm": 31.7482852935791, "learning_rate": 3.5014717687985015e-07, "loss": 0.4743, "step": 4857 }, { "epoch": 0.64998662028365, "grad_norm": 26.944263458251953, "learning_rate": 3.5001337971635e-07, "loss": 0.3961, "step": 4858 }, { "epoch": 0.6501204174471501, "grad_norm": 20.04827880859375, "learning_rate": 3.498795825528499e-07, "loss": 0.3729, "step": 4859 }, { "epoch": 0.6502542146106502, "grad_norm": 21.03137969970703, "learning_rate": 3.4974578538934973e-07, "loss": 0.3925, "step": 4860 }, { "epoch": 0.6503880117741504, "grad_norm": 26.189355850219727, "learning_rate": 3.4961198822584956e-07, "loss": 0.5009, "step": 4861 }, { "epoch": 0.6505218089376505, "grad_norm": 30.898073196411133, "learning_rate": 3.4947819106234944e-07, "loss": 0.4191, "step": 4862 }, { "epoch": 0.6506556061011507, "grad_norm": 29.67040252685547, "learning_rate": 3.493443938988493e-07, "loss": 0.6095, "step": 4863 }, { "epoch": 0.6507894032646507, "grad_norm": 23.35542106628418, "learning_rate": 3.492105967353492e-07, "loss": 0.4775, "step": 4864 }, { "epoch": 0.6509232004281509, "grad_norm": 25.703067779541016, "learning_rate": 3.49076799571849e-07, "loss": 0.4579, "step": 4865 }, { "epoch": 0.651056997591651, "grad_norm": 18.326404571533203, "learning_rate": 3.4894300240834896e-07, "loss": 0.3318, "step": 4866 }, { "epoch": 0.6511907947551512, "grad_norm": 23.445289611816406, "learning_rate": 3.488092052448488e-07, "loss": 0.4093, "step": 4867 }, { "epoch": 0.6513245919186513, "grad_norm": 25.634370803833008, "learning_rate": 3.4867540808134866e-07, "loss": 0.4176, "step": 4868 }, { "epoch": 0.6514583890821515, "grad_norm": 22.560632705688477, "learning_rate": 3.485416109178485e-07, "loss": 0.3853, "step": 4869 }, { "epoch": 0.6515921862456516, "grad_norm": 27.377668380737305, "learning_rate": 3.484078137543484e-07, "loss": 0.5141, "step": 4870 }, { "epoch": 0.6517259834091518, "grad_norm": 33.0823860168457, "learning_rate": 3.4827401659084825e-07, "loss": 0.35, "step": 4871 }, { "epoch": 0.6518597805726518, "grad_norm": 28.319602966308594, "learning_rate": 3.481402194273481e-07, "loss": 0.5519, "step": 4872 }, { "epoch": 0.651993577736152, "grad_norm": 35.46113967895508, "learning_rate": 3.4800642226384795e-07, "loss": 0.3902, "step": 4873 }, { "epoch": 0.6521273748996521, "grad_norm": 20.327985763549805, "learning_rate": 3.478726251003479e-07, "loss": 0.4625, "step": 4874 }, { "epoch": 0.6522611720631523, "grad_norm": 28.556093215942383, "learning_rate": 3.477388279368477e-07, "loss": 0.4128, "step": 4875 }, { "epoch": 0.6523949692266524, "grad_norm": 20.72854232788086, "learning_rate": 3.476050307733476e-07, "loss": 0.424, "step": 4876 }, { "epoch": 0.6525287663901526, "grad_norm": 20.006689071655273, "learning_rate": 3.4747123360984747e-07, "loss": 0.2377, "step": 4877 }, { "epoch": 0.6526625635536527, "grad_norm": 45.941043853759766, "learning_rate": 3.4733743644634735e-07, "loss": 0.3052, "step": 4878 }, { "epoch": 0.6527963607171527, "grad_norm": 22.506277084350586, "learning_rate": 3.472036392828472e-07, "loss": 0.2936, "step": 4879 }, { "epoch": 0.6529301578806529, "grad_norm": 12.967500686645508, "learning_rate": 3.4706984211934705e-07, "loss": 0.2486, "step": 4880 }, { "epoch": 0.653063955044153, "grad_norm": 41.5772590637207, "learning_rate": 3.4693604495584693e-07, "loss": 0.4452, "step": 4881 }, { "epoch": 0.6531977522076532, "grad_norm": 23.407669067382812, "learning_rate": 3.468022477923468e-07, "loss": 0.2815, "step": 4882 }, { "epoch": 0.6533315493711533, "grad_norm": 23.823585510253906, "learning_rate": 3.4666845062884664e-07, "loss": 0.2611, "step": 4883 }, { "epoch": 0.6534653465346535, "grad_norm": 25.225942611694336, "learning_rate": 3.465346534653465e-07, "loss": 0.5362, "step": 4884 }, { "epoch": 0.6535991436981536, "grad_norm": 27.4511775970459, "learning_rate": 3.464008563018464e-07, "loss": 0.3519, "step": 4885 }, { "epoch": 0.6537329408616537, "grad_norm": 30.68878936767578, "learning_rate": 3.462670591383463e-07, "loss": 0.5086, "step": 4886 }, { "epoch": 0.6538667380251538, "grad_norm": 27.558786392211914, "learning_rate": 3.461332619748461e-07, "loss": 0.326, "step": 4887 }, { "epoch": 0.654000535188654, "grad_norm": 26.8610897064209, "learning_rate": 3.4599946481134604e-07, "loss": 0.3195, "step": 4888 }, { "epoch": 0.6541343323521541, "grad_norm": 52.95899963378906, "learning_rate": 3.4586566764784586e-07, "loss": 0.7159, "step": 4889 }, { "epoch": 0.6542681295156543, "grad_norm": 22.85002899169922, "learning_rate": 3.457318704843457e-07, "loss": 0.3244, "step": 4890 }, { "epoch": 0.6544019266791544, "grad_norm": 23.415529251098633, "learning_rate": 3.4559807332084557e-07, "loss": 0.3793, "step": 4891 }, { "epoch": 0.6545357238426546, "grad_norm": 21.073688507080078, "learning_rate": 3.4546427615734545e-07, "loss": 0.3136, "step": 4892 }, { "epoch": 0.6546695210061547, "grad_norm": 36.10994338989258, "learning_rate": 3.453304789938453e-07, "loss": 0.3837, "step": 4893 }, { "epoch": 0.6548033181696548, "grad_norm": 16.695390701293945, "learning_rate": 3.4519668183034515e-07, "loss": 0.182, "step": 4894 }, { "epoch": 0.6549371153331549, "grad_norm": 23.74831199645996, "learning_rate": 3.4506288466684503e-07, "loss": 0.1131, "step": 4895 }, { "epoch": 0.6550709124966551, "grad_norm": 25.61917495727539, "learning_rate": 3.449290875033449e-07, "loss": 0.3237, "step": 4896 }, { "epoch": 0.6552047096601552, "grad_norm": 26.176502227783203, "learning_rate": 3.447952903398448e-07, "loss": 0.3599, "step": 4897 }, { "epoch": 0.6553385068236554, "grad_norm": 14.090481758117676, "learning_rate": 3.446614931763446e-07, "loss": 0.1899, "step": 4898 }, { "epoch": 0.6554723039871555, "grad_norm": 48.9961051940918, "learning_rate": 3.4452769601284455e-07, "loss": 0.5025, "step": 4899 }, { "epoch": 0.6556061011506557, "grad_norm": 30.460607528686523, "learning_rate": 3.443938988493444e-07, "loss": 0.4203, "step": 4900 }, { "epoch": 0.6557398983141557, "grad_norm": 35.380287170410156, "learning_rate": 3.4426010168584425e-07, "loss": 0.5601, "step": 4901 }, { "epoch": 0.6558736954776558, "grad_norm": 30.09360694885254, "learning_rate": 3.441263045223441e-07, "loss": 0.3926, "step": 4902 }, { "epoch": 0.656007492641156, "grad_norm": 22.45304298400879, "learning_rate": 3.43992507358844e-07, "loss": 0.3438, "step": 4903 }, { "epoch": 0.6561412898046561, "grad_norm": 37.12942123413086, "learning_rate": 3.4385871019534384e-07, "loss": 0.595, "step": 4904 }, { "epoch": 0.6562750869681563, "grad_norm": 47.67929458618164, "learning_rate": 3.437249130318437e-07, "loss": 0.2893, "step": 4905 }, { "epoch": 0.6564088841316564, "grad_norm": 30.555776596069336, "learning_rate": 3.4359111586834354e-07, "loss": 0.4979, "step": 4906 }, { "epoch": 0.6565426812951566, "grad_norm": 34.45344161987305, "learning_rate": 3.434573187048435e-07, "loss": 0.3419, "step": 4907 }, { "epoch": 0.6566764784586567, "grad_norm": 34.40779495239258, "learning_rate": 3.433235215413433e-07, "loss": 0.3355, "step": 4908 }, { "epoch": 0.6568102756221568, "grad_norm": 36.08001708984375, "learning_rate": 3.431897243778432e-07, "loss": 0.413, "step": 4909 }, { "epoch": 0.6569440727856569, "grad_norm": 25.212051391601562, "learning_rate": 3.4305592721434306e-07, "loss": 0.593, "step": 4910 }, { "epoch": 0.6570778699491571, "grad_norm": 37.067283630371094, "learning_rate": 3.4292213005084294e-07, "loss": 0.4309, "step": 4911 }, { "epoch": 0.6572116671126572, "grad_norm": 27.62531280517578, "learning_rate": 3.4278833288734277e-07, "loss": 0.5202, "step": 4912 }, { "epoch": 0.6573454642761574, "grad_norm": 45.27996826171875, "learning_rate": 3.426545357238426e-07, "loss": 0.4949, "step": 4913 }, { "epoch": 0.6574792614396575, "grad_norm": 33.709407806396484, "learning_rate": 3.425207385603425e-07, "loss": 0.5229, "step": 4914 }, { "epoch": 0.6576130586031577, "grad_norm": 45.073177337646484, "learning_rate": 3.4238694139684235e-07, "loss": 0.5265, "step": 4915 }, { "epoch": 0.6577468557666577, "grad_norm": 43.64764404296875, "learning_rate": 3.4225314423334223e-07, "loss": 0.5996, "step": 4916 }, { "epoch": 0.6578806529301578, "grad_norm": 39.8863410949707, "learning_rate": 3.4211934706984206e-07, "loss": 0.3946, "step": 4917 }, { "epoch": 0.658014450093658, "grad_norm": 40.88804244995117, "learning_rate": 3.41985549906342e-07, "loss": 0.5625, "step": 4918 }, { "epoch": 0.6581482472571581, "grad_norm": 36.793434143066406, "learning_rate": 3.418517527428418e-07, "loss": 0.3192, "step": 4919 }, { "epoch": 0.6582820444206583, "grad_norm": 20.23357391357422, "learning_rate": 3.417179555793417e-07, "loss": 0.2936, "step": 4920 }, { "epoch": 0.6584158415841584, "grad_norm": 35.27725601196289, "learning_rate": 3.415841584158416e-07, "loss": 0.5154, "step": 4921 }, { "epoch": 0.6585496387476586, "grad_norm": 25.401592254638672, "learning_rate": 3.4145036125234145e-07, "loss": 0.4322, "step": 4922 }, { "epoch": 0.6586834359111586, "grad_norm": 24.926565170288086, "learning_rate": 3.413165640888413e-07, "loss": 0.3063, "step": 4923 }, { "epoch": 0.6588172330746588, "grad_norm": 25.357742309570312, "learning_rate": 3.4118276692534116e-07, "loss": 0.4154, "step": 4924 }, { "epoch": 0.6589510302381589, "grad_norm": 38.67372131347656, "learning_rate": 3.4104896976184104e-07, "loss": 0.5389, "step": 4925 }, { "epoch": 0.6590848274016591, "grad_norm": 35.488563537597656, "learning_rate": 3.409151725983409e-07, "loss": 0.5794, "step": 4926 }, { "epoch": 0.6592186245651592, "grad_norm": 35.48331832885742, "learning_rate": 3.4078137543484074e-07, "loss": 0.3241, "step": 4927 }, { "epoch": 0.6593524217286594, "grad_norm": 29.792387008666992, "learning_rate": 3.406475782713406e-07, "loss": 0.3996, "step": 4928 }, { "epoch": 0.6594862188921595, "grad_norm": 37.69997787475586, "learning_rate": 3.405137811078405e-07, "loss": 0.45, "step": 4929 }, { "epoch": 0.6596200160556597, "grad_norm": 28.80095863342285, "learning_rate": 3.403799839443404e-07, "loss": 0.2766, "step": 4930 }, { "epoch": 0.6597538132191597, "grad_norm": 27.429214477539062, "learning_rate": 3.402461867808402e-07, "loss": 0.362, "step": 4931 }, { "epoch": 0.6598876103826599, "grad_norm": 14.884490013122559, "learning_rate": 3.4011238961734014e-07, "loss": 0.3101, "step": 4932 }, { "epoch": 0.66002140754616, "grad_norm": 22.503400802612305, "learning_rate": 3.3997859245383997e-07, "loss": 0.3905, "step": 4933 }, { "epoch": 0.6601552047096602, "grad_norm": 25.00767707824707, "learning_rate": 3.3984479529033985e-07, "loss": 0.4706, "step": 4934 }, { "epoch": 0.6602890018731603, "grad_norm": 37.24898147583008, "learning_rate": 3.3971099812683967e-07, "loss": 0.5774, "step": 4935 }, { "epoch": 0.6604227990366605, "grad_norm": 41.509613037109375, "learning_rate": 3.395772009633396e-07, "loss": 0.4513, "step": 4936 }, { "epoch": 0.6605565962001606, "grad_norm": 30.059236526489258, "learning_rate": 3.3944340379983943e-07, "loss": 0.3558, "step": 4937 }, { "epoch": 0.6606903933636606, "grad_norm": 34.49867248535156, "learning_rate": 3.393096066363393e-07, "loss": 0.5743, "step": 4938 }, { "epoch": 0.6608241905271608, "grad_norm": 19.959367752075195, "learning_rate": 3.3917580947283914e-07, "loss": 0.3382, "step": 4939 }, { "epoch": 0.6609579876906609, "grad_norm": 22.694143295288086, "learning_rate": 3.39042012309339e-07, "loss": 0.3458, "step": 4940 }, { "epoch": 0.6610917848541611, "grad_norm": 27.83702850341797, "learning_rate": 3.389082151458389e-07, "loss": 0.2893, "step": 4941 }, { "epoch": 0.6612255820176612, "grad_norm": 67.45203399658203, "learning_rate": 3.387744179823387e-07, "loss": 0.7863, "step": 4942 }, { "epoch": 0.6613593791811614, "grad_norm": 27.738088607788086, "learning_rate": 3.3864062081883865e-07, "loss": 0.4583, "step": 4943 }, { "epoch": 0.6614931763446615, "grad_norm": 20.140777587890625, "learning_rate": 3.385068236553385e-07, "loss": 0.2846, "step": 4944 }, { "epoch": 0.6616269735081616, "grad_norm": 52.19807815551758, "learning_rate": 3.3837302649183836e-07, "loss": 0.5649, "step": 4945 }, { "epoch": 0.6617607706716617, "grad_norm": 38.07120132446289, "learning_rate": 3.382392293283382e-07, "loss": 0.461, "step": 4946 }, { "epoch": 0.6618945678351619, "grad_norm": 31.120281219482422, "learning_rate": 3.381054321648381e-07, "loss": 0.4685, "step": 4947 }, { "epoch": 0.662028364998662, "grad_norm": 27.31859588623047, "learning_rate": 3.3797163500133794e-07, "loss": 0.3305, "step": 4948 }, { "epoch": 0.6621621621621622, "grad_norm": 18.92226219177246, "learning_rate": 3.378378378378378e-07, "loss": 0.3652, "step": 4949 }, { "epoch": 0.6622959593256623, "grad_norm": 17.19691276550293, "learning_rate": 3.3770404067433765e-07, "loss": 0.3162, "step": 4950 }, { "epoch": 0.6624297564891625, "grad_norm": 29.29511260986328, "learning_rate": 3.375702435108376e-07, "loss": 0.4374, "step": 4951 }, { "epoch": 0.6625635536526626, "grad_norm": 24.99521255493164, "learning_rate": 3.374364463473374e-07, "loss": 0.2418, "step": 4952 }, { "epoch": 0.6626973508161627, "grad_norm": 24.780303955078125, "learning_rate": 3.373026491838373e-07, "loss": 0.5575, "step": 4953 }, { "epoch": 0.6628311479796628, "grad_norm": 21.050682067871094, "learning_rate": 3.3716885202033717e-07, "loss": 0.3388, "step": 4954 }, { "epoch": 0.662964945143163, "grad_norm": 25.058975219726562, "learning_rate": 3.3703505485683705e-07, "loss": 0.4375, "step": 4955 }, { "epoch": 0.6630987423066631, "grad_norm": 23.588537216186523, "learning_rate": 3.3690125769333687e-07, "loss": 0.3389, "step": 4956 }, { "epoch": 0.6632325394701633, "grad_norm": 18.286949157714844, "learning_rate": 3.3676746052983675e-07, "loss": 0.4224, "step": 4957 }, { "epoch": 0.6633663366336634, "grad_norm": 31.56613540649414, "learning_rate": 3.3663366336633663e-07, "loss": 0.413, "step": 4958 }, { "epoch": 0.6635001337971635, "grad_norm": 21.12949562072754, "learning_rate": 3.364998662028365e-07, "loss": 0.3809, "step": 4959 }, { "epoch": 0.6636339309606636, "grad_norm": 30.60123634338379, "learning_rate": 3.3636606903933634e-07, "loss": 0.3503, "step": 4960 }, { "epoch": 0.6637677281241637, "grad_norm": 24.41118812561035, "learning_rate": 3.362322718758362e-07, "loss": 0.4749, "step": 4961 }, { "epoch": 0.6639015252876639, "grad_norm": 29.319520950317383, "learning_rate": 3.360984747123361e-07, "loss": 0.5132, "step": 4962 }, { "epoch": 0.664035322451164, "grad_norm": 23.435903549194336, "learning_rate": 3.35964677548836e-07, "loss": 0.319, "step": 4963 }, { "epoch": 0.6641691196146642, "grad_norm": 39.602970123291016, "learning_rate": 3.358308803853358e-07, "loss": 0.416, "step": 4964 }, { "epoch": 0.6643029167781643, "grad_norm": 38.80707550048828, "learning_rate": 3.3569708322183573e-07, "loss": 0.6112, "step": 4965 }, { "epoch": 0.6644367139416645, "grad_norm": 26.232566833496094, "learning_rate": 3.3556328605833556e-07, "loss": 0.2969, "step": 4966 }, { "epoch": 0.6645705111051645, "grad_norm": 36.082611083984375, "learning_rate": 3.354294888948354e-07, "loss": 0.2979, "step": 4967 }, { "epoch": 0.6647043082686647, "grad_norm": 42.94127655029297, "learning_rate": 3.3529569173133527e-07, "loss": 0.288, "step": 4968 }, { "epoch": 0.6648381054321648, "grad_norm": 38.46642303466797, "learning_rate": 3.3516189456783514e-07, "loss": 0.4285, "step": 4969 }, { "epoch": 0.664971902595665, "grad_norm": 26.939531326293945, "learning_rate": 3.35028097404335e-07, "loss": 0.487, "step": 4970 }, { "epoch": 0.6651056997591651, "grad_norm": 59.20045471191406, "learning_rate": 3.3489430024083485e-07, "loss": 0.3808, "step": 4971 }, { "epoch": 0.6652394969226653, "grad_norm": 23.130542755126953, "learning_rate": 3.3476050307733473e-07, "loss": 0.3397, "step": 4972 }, { "epoch": 0.6653732940861654, "grad_norm": 20.17921257019043, "learning_rate": 3.346267059138346e-07, "loss": 0.3615, "step": 4973 }, { "epoch": 0.6655070912496656, "grad_norm": 28.55038070678711, "learning_rate": 3.344929087503345e-07, "loss": 0.4902, "step": 4974 }, { "epoch": 0.6656408884131656, "grad_norm": 17.4390926361084, "learning_rate": 3.343591115868343e-07, "loss": 0.3351, "step": 4975 }, { "epoch": 0.6657746855766657, "grad_norm": 38.37306594848633, "learning_rate": 3.3422531442333425e-07, "loss": 0.4474, "step": 4976 }, { "epoch": 0.6659084827401659, "grad_norm": 22.188209533691406, "learning_rate": 3.3409151725983407e-07, "loss": 0.3661, "step": 4977 }, { "epoch": 0.666042279903666, "grad_norm": 30.90142059326172, "learning_rate": 3.3395772009633395e-07, "loss": 0.5135, "step": 4978 }, { "epoch": 0.6661760770671662, "grad_norm": 27.34392547607422, "learning_rate": 3.338239229328338e-07, "loss": 0.616, "step": 4979 }, { "epoch": 0.6663098742306663, "grad_norm": 19.83935546875, "learning_rate": 3.336901257693337e-07, "loss": 0.1783, "step": 4980 }, { "epoch": 0.6664436713941665, "grad_norm": 31.403291702270508, "learning_rate": 3.3355632860583354e-07, "loss": 0.3821, "step": 4981 }, { "epoch": 0.6665774685576665, "grad_norm": 22.57485580444336, "learning_rate": 3.334225314423334e-07, "loss": 0.4269, "step": 4982 }, { "epoch": 0.6667112657211667, "grad_norm": 27.314268112182617, "learning_rate": 3.3328873427883324e-07, "loss": 0.3184, "step": 4983 }, { "epoch": 0.6668450628846668, "grad_norm": 35.06191635131836, "learning_rate": 3.331549371153332e-07, "loss": 0.511, "step": 4984 }, { "epoch": 0.666978860048167, "grad_norm": 25.757654190063477, "learning_rate": 3.33021139951833e-07, "loss": 0.3258, "step": 4985 }, { "epoch": 0.6671126572116671, "grad_norm": 32.41953659057617, "learning_rate": 3.328873427883329e-07, "loss": 0.3887, "step": 4986 }, { "epoch": 0.6672464543751673, "grad_norm": 31.231616973876953, "learning_rate": 3.3275354562483276e-07, "loss": 0.4851, "step": 4987 }, { "epoch": 0.6673802515386674, "grad_norm": 27.890548706054688, "learning_rate": 3.3261974846133264e-07, "loss": 0.4863, "step": 4988 }, { "epoch": 0.6675140487021675, "grad_norm": 22.203495025634766, "learning_rate": 3.3248595129783247e-07, "loss": 0.5975, "step": 4989 }, { "epoch": 0.6676478458656676, "grad_norm": 19.32182502746582, "learning_rate": 3.3235215413433234e-07, "loss": 0.4236, "step": 4990 }, { "epoch": 0.6677816430291678, "grad_norm": 27.46799659729004, "learning_rate": 3.322183569708322e-07, "loss": 0.6191, "step": 4991 }, { "epoch": 0.6679154401926679, "grad_norm": 24.642635345458984, "learning_rate": 3.3208455980733205e-07, "loss": 0.3341, "step": 4992 }, { "epoch": 0.6680492373561681, "grad_norm": 29.538394927978516, "learning_rate": 3.3195076264383193e-07, "loss": 0.4645, "step": 4993 }, { "epoch": 0.6681830345196682, "grad_norm": 35.50986862182617, "learning_rate": 3.3181696548033176e-07, "loss": 0.5213, "step": 4994 }, { "epoch": 0.6683168316831684, "grad_norm": 37.05827713012695, "learning_rate": 3.316831683168317e-07, "loss": 0.443, "step": 4995 }, { "epoch": 0.6684506288466685, "grad_norm": 25.974143981933594, "learning_rate": 3.315493711533315e-07, "loss": 0.4601, "step": 4996 }, { "epoch": 0.6685844260101685, "grad_norm": 30.19951629638672, "learning_rate": 3.314155739898314e-07, "loss": 0.4616, "step": 4997 }, { "epoch": 0.6687182231736687, "grad_norm": 37.64552307128906, "learning_rate": 3.3128177682633127e-07, "loss": 0.3715, "step": 4998 }, { "epoch": 0.6688520203371688, "grad_norm": 19.54494857788086, "learning_rate": 3.3114797966283115e-07, "loss": 0.4355, "step": 4999 }, { "epoch": 0.668985817500669, "grad_norm": 22.01133155822754, "learning_rate": 3.31014182499331e-07, "loss": 0.3581, "step": 5000 }, { "epoch": 0.6691196146641691, "grad_norm": 30.11725616455078, "learning_rate": 3.3088038533583086e-07, "loss": 0.5036, "step": 5001 }, { "epoch": 0.6692534118276693, "grad_norm": 22.847570419311523, "learning_rate": 3.3074658817233074e-07, "loss": 0.3478, "step": 5002 }, { "epoch": 0.6693872089911694, "grad_norm": 35.66350173950195, "learning_rate": 3.306127910088306e-07, "loss": 0.4208, "step": 5003 }, { "epoch": 0.6695210061546695, "grad_norm": 28.35469627380371, "learning_rate": 3.3047899384533044e-07, "loss": 0.3145, "step": 5004 }, { "epoch": 0.6696548033181696, "grad_norm": 23.20623016357422, "learning_rate": 3.303451966818303e-07, "loss": 0.3392, "step": 5005 }, { "epoch": 0.6697886004816698, "grad_norm": 22.120817184448242, "learning_rate": 3.302113995183302e-07, "loss": 0.3106, "step": 5006 }, { "epoch": 0.6699223976451699, "grad_norm": 24.720767974853516, "learning_rate": 3.300776023548301e-07, "loss": 0.2619, "step": 5007 }, { "epoch": 0.6700561948086701, "grad_norm": 40.966224670410156, "learning_rate": 3.299438051913299e-07, "loss": 0.4275, "step": 5008 }, { "epoch": 0.6701899919721702, "grad_norm": 18.09941864013672, "learning_rate": 3.2981000802782984e-07, "loss": 0.3741, "step": 5009 }, { "epoch": 0.6703237891356704, "grad_norm": 11.805195808410645, "learning_rate": 3.2967621086432967e-07, "loss": 0.1535, "step": 5010 }, { "epoch": 0.6704575862991704, "grad_norm": 54.23879623413086, "learning_rate": 3.2954241370082954e-07, "loss": 0.7183, "step": 5011 }, { "epoch": 0.6705913834626706, "grad_norm": 27.40304183959961, "learning_rate": 3.2940861653732937e-07, "loss": 0.3836, "step": 5012 }, { "epoch": 0.6707251806261707, "grad_norm": 61.7020149230957, "learning_rate": 3.292748193738293e-07, "loss": 0.6948, "step": 5013 }, { "epoch": 0.6708589777896709, "grad_norm": 24.784875869750977, "learning_rate": 3.2914102221032913e-07, "loss": 0.4618, "step": 5014 }, { "epoch": 0.670992774953171, "grad_norm": 26.764419555664062, "learning_rate": 3.29007225046829e-07, "loss": 0.3514, "step": 5015 }, { "epoch": 0.6711265721166711, "grad_norm": 25.129926681518555, "learning_rate": 3.2887342788332884e-07, "loss": 0.3685, "step": 5016 }, { "epoch": 0.6712603692801713, "grad_norm": 32.80125045776367, "learning_rate": 3.2873963071982877e-07, "loss": 0.4693, "step": 5017 }, { "epoch": 0.6713941664436714, "grad_norm": 38.89385223388672, "learning_rate": 3.286058335563286e-07, "loss": 0.4814, "step": 5018 }, { "epoch": 0.6715279636071715, "grad_norm": 28.859861373901367, "learning_rate": 3.284720363928284e-07, "loss": 0.446, "step": 5019 }, { "epoch": 0.6716617607706716, "grad_norm": 23.377864837646484, "learning_rate": 3.2833823922932835e-07, "loss": 0.4653, "step": 5020 }, { "epoch": 0.6717955579341718, "grad_norm": 20.865299224853516, "learning_rate": 3.282044420658282e-07, "loss": 0.3468, "step": 5021 }, { "epoch": 0.6719293550976719, "grad_norm": 40.04106521606445, "learning_rate": 3.2807064490232806e-07, "loss": 0.6021, "step": 5022 }, { "epoch": 0.6720631522611721, "grad_norm": 41.29440689086914, "learning_rate": 3.279368477388279e-07, "loss": 0.4049, "step": 5023 }, { "epoch": 0.6721969494246722, "grad_norm": 34.64349365234375, "learning_rate": 3.278030505753278e-07, "loss": 0.522, "step": 5024 }, { "epoch": 0.6723307465881724, "grad_norm": 29.73375129699707, "learning_rate": 3.2766925341182764e-07, "loss": 0.4594, "step": 5025 }, { "epoch": 0.6724645437516724, "grad_norm": 23.544620513916016, "learning_rate": 3.275354562483275e-07, "loss": 0.2374, "step": 5026 }, { "epoch": 0.6725983409151726, "grad_norm": 26.91254997253418, "learning_rate": 3.2740165908482735e-07, "loss": 0.2719, "step": 5027 }, { "epoch": 0.6727321380786727, "grad_norm": 21.446935653686523, "learning_rate": 3.272678619213273e-07, "loss": 0.4088, "step": 5028 }, { "epoch": 0.6728659352421729, "grad_norm": 38.26571273803711, "learning_rate": 3.271340647578271e-07, "loss": 0.5942, "step": 5029 }, { "epoch": 0.672999732405673, "grad_norm": 54.621742248535156, "learning_rate": 3.27000267594327e-07, "loss": 0.4324, "step": 5030 }, { "epoch": 0.6731335295691732, "grad_norm": 20.384212493896484, "learning_rate": 3.268664704308268e-07, "loss": 0.4094, "step": 5031 }, { "epoch": 0.6732673267326733, "grad_norm": 27.77450180053711, "learning_rate": 3.2673267326732674e-07, "loss": 0.5041, "step": 5032 }, { "epoch": 0.6734011238961733, "grad_norm": 19.03691864013672, "learning_rate": 3.2659887610382657e-07, "loss": 0.3907, "step": 5033 }, { "epoch": 0.6735349210596735, "grad_norm": 23.287582397460938, "learning_rate": 3.2646507894032645e-07, "loss": 0.5202, "step": 5034 }, { "epoch": 0.6736687182231736, "grad_norm": 38.04383087158203, "learning_rate": 3.2633128177682633e-07, "loss": 0.2705, "step": 5035 }, { "epoch": 0.6738025153866738, "grad_norm": 28.845703125, "learning_rate": 3.261974846133262e-07, "loss": 0.3838, "step": 5036 }, { "epoch": 0.6739363125501739, "grad_norm": 29.053991317749023, "learning_rate": 3.2606368744982604e-07, "loss": 0.4185, "step": 5037 }, { "epoch": 0.6740701097136741, "grad_norm": 35.272830963134766, "learning_rate": 3.259298902863259e-07, "loss": 0.6397, "step": 5038 }, { "epoch": 0.6742039068771742, "grad_norm": 30.72479248046875, "learning_rate": 3.257960931228258e-07, "loss": 0.4697, "step": 5039 }, { "epoch": 0.6743377040406744, "grad_norm": 21.062074661254883, "learning_rate": 3.2566229595932567e-07, "loss": 0.4214, "step": 5040 }, { "epoch": 0.6744715012041744, "grad_norm": 25.7012996673584, "learning_rate": 3.255284987958255e-07, "loss": 0.5347, "step": 5041 }, { "epoch": 0.6746052983676746, "grad_norm": 36.53424835205078, "learning_rate": 3.253947016323254e-07, "loss": 0.4332, "step": 5042 }, { "epoch": 0.6747390955311747, "grad_norm": 23.270530700683594, "learning_rate": 3.2526090446882526e-07, "loss": 0.3122, "step": 5043 }, { "epoch": 0.6748728926946749, "grad_norm": 30.613235473632812, "learning_rate": 3.251271073053251e-07, "loss": 0.3595, "step": 5044 }, { "epoch": 0.675006689858175, "grad_norm": 24.34284019470215, "learning_rate": 3.2499331014182496e-07, "loss": 0.5009, "step": 5045 }, { "epoch": 0.6751404870216752, "grad_norm": 14.273796081542969, "learning_rate": 3.2485951297832484e-07, "loss": 0.2151, "step": 5046 }, { "epoch": 0.6752742841851753, "grad_norm": 30.12422752380371, "learning_rate": 3.247257158148247e-07, "loss": 0.2765, "step": 5047 }, { "epoch": 0.6754080813486754, "grad_norm": 30.62480354309082, "learning_rate": 3.2459191865132455e-07, "loss": 0.3647, "step": 5048 }, { "epoch": 0.6755418785121755, "grad_norm": 30.802196502685547, "learning_rate": 3.2445812148782443e-07, "loss": 0.5034, "step": 5049 }, { "epoch": 0.6756756756756757, "grad_norm": 25.87616729736328, "learning_rate": 3.243243243243243e-07, "loss": 0.4853, "step": 5050 }, { "epoch": 0.6758094728391758, "grad_norm": 25.45184898376465, "learning_rate": 3.241905271608242e-07, "loss": 0.3786, "step": 5051 }, { "epoch": 0.675943270002676, "grad_norm": 52.05412292480469, "learning_rate": 3.24056729997324e-07, "loss": 0.5864, "step": 5052 }, { "epoch": 0.6760770671661761, "grad_norm": 52.01383972167969, "learning_rate": 3.239229328338239e-07, "loss": 0.3913, "step": 5053 }, { "epoch": 0.6762108643296763, "grad_norm": 62.603179931640625, "learning_rate": 3.2378913567032377e-07, "loss": 0.6247, "step": 5054 }, { "epoch": 0.6763446614931763, "grad_norm": 25.588685989379883, "learning_rate": 3.2365533850682365e-07, "loss": 0.4486, "step": 5055 }, { "epoch": 0.6764784586566764, "grad_norm": 39.68572235107422, "learning_rate": 3.235215413433235e-07, "loss": 0.3555, "step": 5056 }, { "epoch": 0.6766122558201766, "grad_norm": 28.308551788330078, "learning_rate": 3.233877441798234e-07, "loss": 0.5743, "step": 5057 }, { "epoch": 0.6767460529836767, "grad_norm": 30.845378875732422, "learning_rate": 3.2325394701632324e-07, "loss": 0.6234, "step": 5058 }, { "epoch": 0.6768798501471769, "grad_norm": 19.28206443786621, "learning_rate": 3.231201498528231e-07, "loss": 0.3954, "step": 5059 }, { "epoch": 0.677013647310677, "grad_norm": 36.27632141113281, "learning_rate": 3.2298635268932294e-07, "loss": 0.3409, "step": 5060 }, { "epoch": 0.6771474444741772, "grad_norm": 28.350923538208008, "learning_rate": 3.2285255552582287e-07, "loss": 0.5173, "step": 5061 }, { "epoch": 0.6772812416376773, "grad_norm": 39.0750732421875, "learning_rate": 3.227187583623227e-07, "loss": 0.5745, "step": 5062 }, { "epoch": 0.6774150388011774, "grad_norm": 22.327678680419922, "learning_rate": 3.225849611988226e-07, "loss": 0.288, "step": 5063 }, { "epoch": 0.6775488359646775, "grad_norm": 25.40116310119629, "learning_rate": 3.224511640353224e-07, "loss": 0.4472, "step": 5064 }, { "epoch": 0.6776826331281777, "grad_norm": 25.858705520629883, "learning_rate": 3.2231736687182234e-07, "loss": 0.3044, "step": 5065 }, { "epoch": 0.6778164302916778, "grad_norm": 30.93669891357422, "learning_rate": 3.2218356970832216e-07, "loss": 0.2493, "step": 5066 }, { "epoch": 0.677950227455178, "grad_norm": 24.1177978515625, "learning_rate": 3.2204977254482204e-07, "loss": 0.3933, "step": 5067 }, { "epoch": 0.6780840246186781, "grad_norm": 28.784584045410156, "learning_rate": 3.219159753813219e-07, "loss": 0.3682, "step": 5068 }, { "epoch": 0.6782178217821783, "grad_norm": 27.117685317993164, "learning_rate": 3.217821782178218e-07, "loss": 0.2403, "step": 5069 }, { "epoch": 0.6783516189456783, "grad_norm": 43.83497619628906, "learning_rate": 3.2164838105432163e-07, "loss": 0.302, "step": 5070 }, { "epoch": 0.6784854161091785, "grad_norm": 46.13308334350586, "learning_rate": 3.2151458389082145e-07, "loss": 0.5565, "step": 5071 }, { "epoch": 0.6786192132726786, "grad_norm": 26.11301612854004, "learning_rate": 3.213807867273214e-07, "loss": 0.4145, "step": 5072 }, { "epoch": 0.6787530104361787, "grad_norm": 25.268024444580078, "learning_rate": 3.212469895638212e-07, "loss": 0.443, "step": 5073 }, { "epoch": 0.6788868075996789, "grad_norm": 28.10826301574707, "learning_rate": 3.211131924003211e-07, "loss": 0.5317, "step": 5074 }, { "epoch": 0.679020604763179, "grad_norm": 28.88863754272461, "learning_rate": 3.209793952368209e-07, "loss": 0.3006, "step": 5075 }, { "epoch": 0.6791544019266792, "grad_norm": 27.23813819885254, "learning_rate": 3.2084559807332085e-07, "loss": 0.3888, "step": 5076 }, { "epoch": 0.6792881990901792, "grad_norm": 29.910181045532227, "learning_rate": 3.207118009098207e-07, "loss": 0.5619, "step": 5077 }, { "epoch": 0.6794219962536794, "grad_norm": 22.407045364379883, "learning_rate": 3.2057800374632056e-07, "loss": 0.2096, "step": 5078 }, { "epoch": 0.6795557934171795, "grad_norm": 30.94254493713379, "learning_rate": 3.2044420658282044e-07, "loss": 0.3703, "step": 5079 }, { "epoch": 0.6796895905806797, "grad_norm": 35.005393981933594, "learning_rate": 3.203104094193203e-07, "loss": 0.4131, "step": 5080 }, { "epoch": 0.6798233877441798, "grad_norm": 31.300628662109375, "learning_rate": 3.2017661225582014e-07, "loss": 0.5503, "step": 5081 }, { "epoch": 0.67995718490768, "grad_norm": 20.202213287353516, "learning_rate": 3.2004281509232e-07, "loss": 0.2785, "step": 5082 }, { "epoch": 0.6800909820711801, "grad_norm": 29.86245346069336, "learning_rate": 3.199090179288199e-07, "loss": 0.3989, "step": 5083 }, { "epoch": 0.6802247792346803, "grad_norm": 24.12659454345703, "learning_rate": 3.197752207653198e-07, "loss": 0.4347, "step": 5084 }, { "epoch": 0.6803585763981803, "grad_norm": 26.163249969482422, "learning_rate": 3.196414236018196e-07, "loss": 0.3939, "step": 5085 }, { "epoch": 0.6804923735616805, "grad_norm": 44.168460845947266, "learning_rate": 3.195076264383195e-07, "loss": 0.5582, "step": 5086 }, { "epoch": 0.6806261707251806, "grad_norm": 26.75056266784668, "learning_rate": 3.1937382927481936e-07, "loss": 0.3206, "step": 5087 }, { "epoch": 0.6807599678886808, "grad_norm": 31.996082305908203, "learning_rate": 3.1924003211131924e-07, "loss": 0.2835, "step": 5088 }, { "epoch": 0.6808937650521809, "grad_norm": 22.978103637695312, "learning_rate": 3.1910623494781907e-07, "loss": 0.4274, "step": 5089 }, { "epoch": 0.6810275622156811, "grad_norm": 59.09239959716797, "learning_rate": 3.18972437784319e-07, "loss": 0.274, "step": 5090 }, { "epoch": 0.6811613593791812, "grad_norm": 23.996877670288086, "learning_rate": 3.1883864062081883e-07, "loss": 0.2762, "step": 5091 }, { "epoch": 0.6812951565426812, "grad_norm": 35.170494079589844, "learning_rate": 3.187048434573187e-07, "loss": 0.579, "step": 5092 }, { "epoch": 0.6814289537061814, "grad_norm": 16.98056411743164, "learning_rate": 3.1857104629381853e-07, "loss": 0.3282, "step": 5093 }, { "epoch": 0.6815627508696815, "grad_norm": 22.692962646484375, "learning_rate": 3.1843724913031847e-07, "loss": 0.3597, "step": 5094 }, { "epoch": 0.6816965480331817, "grad_norm": 24.384906768798828, "learning_rate": 3.183034519668183e-07, "loss": 0.3776, "step": 5095 }, { "epoch": 0.6818303451966818, "grad_norm": 20.83888053894043, "learning_rate": 3.181696548033181e-07, "loss": 0.3325, "step": 5096 }, { "epoch": 0.681964142360182, "grad_norm": 21.15289878845215, "learning_rate": 3.18035857639818e-07, "loss": 0.3369, "step": 5097 }, { "epoch": 0.6820979395236821, "grad_norm": 22.944150924682617, "learning_rate": 3.179020604763179e-07, "loss": 0.3512, "step": 5098 }, { "epoch": 0.6822317366871822, "grad_norm": 50.698211669921875, "learning_rate": 3.1776826331281776e-07, "loss": 0.731, "step": 5099 }, { "epoch": 0.6823655338506823, "grad_norm": 25.737518310546875, "learning_rate": 3.176344661493176e-07, "loss": 0.4428, "step": 5100 }, { "epoch": 0.6824993310141825, "grad_norm": 31.083444595336914, "learning_rate": 3.175006689858175e-07, "loss": 0.4522, "step": 5101 }, { "epoch": 0.6826331281776826, "grad_norm": 21.996658325195312, "learning_rate": 3.1736687182231734e-07, "loss": 0.3801, "step": 5102 }, { "epoch": 0.6827669253411828, "grad_norm": 36.685176849365234, "learning_rate": 3.172330746588172e-07, "loss": 0.3973, "step": 5103 }, { "epoch": 0.6829007225046829, "grad_norm": 18.346323013305664, "learning_rate": 3.1709927749531705e-07, "loss": 0.3674, "step": 5104 }, { "epoch": 0.6830345196681831, "grad_norm": 34.863704681396484, "learning_rate": 3.16965480331817e-07, "loss": 0.264, "step": 5105 }, { "epoch": 0.6831683168316832, "grad_norm": 21.98691749572754, "learning_rate": 3.168316831683168e-07, "loss": 0.4435, "step": 5106 }, { "epoch": 0.6833021139951833, "grad_norm": 39.01151657104492, "learning_rate": 3.166978860048167e-07, "loss": 0.5642, "step": 5107 }, { "epoch": 0.6834359111586834, "grad_norm": 32.89845657348633, "learning_rate": 3.165640888413165e-07, "loss": 0.3148, "step": 5108 }, { "epoch": 0.6835697083221836, "grad_norm": 43.91237258911133, "learning_rate": 3.1643029167781644e-07, "loss": 0.358, "step": 5109 }, { "epoch": 0.6837035054856837, "grad_norm": 19.589035034179688, "learning_rate": 3.1629649451431627e-07, "loss": 0.3149, "step": 5110 }, { "epoch": 0.6838373026491839, "grad_norm": 27.634807586669922, "learning_rate": 3.1616269735081615e-07, "loss": 0.3238, "step": 5111 }, { "epoch": 0.683971099812684, "grad_norm": 26.095531463623047, "learning_rate": 3.1602890018731603e-07, "loss": 0.3518, "step": 5112 }, { "epoch": 0.6841048969761842, "grad_norm": 46.37057876586914, "learning_rate": 3.158951030238159e-07, "loss": 0.4631, "step": 5113 }, { "epoch": 0.6842386941396842, "grad_norm": 30.36549186706543, "learning_rate": 3.1576130586031573e-07, "loss": 0.6144, "step": 5114 }, { "epoch": 0.6843724913031843, "grad_norm": 28.954675674438477, "learning_rate": 3.156275086968156e-07, "loss": 0.3665, "step": 5115 }, { "epoch": 0.6845062884666845, "grad_norm": 21.83670425415039, "learning_rate": 3.154937115333155e-07, "loss": 0.2902, "step": 5116 }, { "epoch": 0.6846400856301846, "grad_norm": 21.823259353637695, "learning_rate": 3.1535991436981537e-07, "loss": 0.2873, "step": 5117 }, { "epoch": 0.6847738827936848, "grad_norm": 17.746374130249023, "learning_rate": 3.152261172063152e-07, "loss": 0.3211, "step": 5118 }, { "epoch": 0.6849076799571849, "grad_norm": 16.620132446289062, "learning_rate": 3.150923200428151e-07, "loss": 0.1409, "step": 5119 }, { "epoch": 0.6850414771206851, "grad_norm": 31.63002586364746, "learning_rate": 3.1495852287931496e-07, "loss": 0.3788, "step": 5120 }, { "epoch": 0.6851752742841851, "grad_norm": 58.72029495239258, "learning_rate": 3.1482472571581484e-07, "loss": 0.7019, "step": 5121 }, { "epoch": 0.6853090714476853, "grad_norm": 47.84846496582031, "learning_rate": 3.1469092855231466e-07, "loss": 0.718, "step": 5122 }, { "epoch": 0.6854428686111854, "grad_norm": 23.96718978881836, "learning_rate": 3.145571313888146e-07, "loss": 0.4056, "step": 5123 }, { "epoch": 0.6855766657746856, "grad_norm": 32.65205764770508, "learning_rate": 3.144233342253144e-07, "loss": 0.3168, "step": 5124 }, { "epoch": 0.6857104629381857, "grad_norm": 22.610370635986328, "learning_rate": 3.1428953706181425e-07, "loss": 0.2974, "step": 5125 }, { "epoch": 0.6858442601016859, "grad_norm": 19.009567260742188, "learning_rate": 3.141557398983141e-07, "loss": 0.3028, "step": 5126 }, { "epoch": 0.685978057265186, "grad_norm": 33.26577377319336, "learning_rate": 3.14021942734814e-07, "loss": 0.5065, "step": 5127 }, { "epoch": 0.6861118544286862, "grad_norm": 22.81492042541504, "learning_rate": 3.138881455713139e-07, "loss": 0.3059, "step": 5128 }, { "epoch": 0.6862456515921862, "grad_norm": 20.938329696655273, "learning_rate": 3.137543484078137e-07, "loss": 0.3843, "step": 5129 }, { "epoch": 0.6863794487556863, "grad_norm": 26.494901657104492, "learning_rate": 3.136205512443136e-07, "loss": 0.4329, "step": 5130 }, { "epoch": 0.6865132459191865, "grad_norm": 34.253170013427734, "learning_rate": 3.1348675408081347e-07, "loss": 0.425, "step": 5131 }, { "epoch": 0.6866470430826866, "grad_norm": 26.306285858154297, "learning_rate": 3.1335295691731335e-07, "loss": 0.368, "step": 5132 }, { "epoch": 0.6867808402461868, "grad_norm": 21.25092315673828, "learning_rate": 3.132191597538132e-07, "loss": 0.1419, "step": 5133 }, { "epoch": 0.686914637409687, "grad_norm": 28.239641189575195, "learning_rate": 3.130853625903131e-07, "loss": 0.3877, "step": 5134 }, { "epoch": 0.6870484345731871, "grad_norm": 18.40943145751953, "learning_rate": 3.1295156542681293e-07, "loss": 0.2879, "step": 5135 }, { "epoch": 0.6871822317366871, "grad_norm": 42.05695724487305, "learning_rate": 3.128177682633128e-07, "loss": 0.4299, "step": 5136 }, { "epoch": 0.6873160289001873, "grad_norm": 15.378443717956543, "learning_rate": 3.1268397109981264e-07, "loss": 0.3065, "step": 5137 }, { "epoch": 0.6874498260636874, "grad_norm": 24.072763442993164, "learning_rate": 3.1255017393631257e-07, "loss": 0.3096, "step": 5138 }, { "epoch": 0.6875836232271876, "grad_norm": 22.018835067749023, "learning_rate": 3.124163767728124e-07, "loss": 0.3784, "step": 5139 }, { "epoch": 0.6877174203906877, "grad_norm": 22.384796142578125, "learning_rate": 3.122825796093123e-07, "loss": 0.2804, "step": 5140 }, { "epoch": 0.6878512175541879, "grad_norm": 35.52460861206055, "learning_rate": 3.121487824458121e-07, "loss": 0.5208, "step": 5141 }, { "epoch": 0.687985014717688, "grad_norm": 54.25371170043945, "learning_rate": 3.1201498528231203e-07, "loss": 0.6195, "step": 5142 }, { "epoch": 0.6881188118811881, "grad_norm": 29.489700317382812, "learning_rate": 3.1188118811881186e-07, "loss": 0.4369, "step": 5143 }, { "epoch": 0.6882526090446882, "grad_norm": 24.233415603637695, "learning_rate": 3.1174739095531174e-07, "loss": 0.3718, "step": 5144 }, { "epoch": 0.6883864062081884, "grad_norm": 20.563671112060547, "learning_rate": 3.116135937918116e-07, "loss": 0.3075, "step": 5145 }, { "epoch": 0.6885202033716885, "grad_norm": 20.748685836791992, "learning_rate": 3.114797966283115e-07, "loss": 0.2109, "step": 5146 }, { "epoch": 0.6886540005351887, "grad_norm": 27.547969818115234, "learning_rate": 3.113459994648113e-07, "loss": 0.3983, "step": 5147 }, { "epoch": 0.6887877976986888, "grad_norm": 37.66606521606445, "learning_rate": 3.1121220230131115e-07, "loss": 0.557, "step": 5148 }, { "epoch": 0.688921594862189, "grad_norm": 31.512855529785156, "learning_rate": 3.110784051378111e-07, "loss": 0.3668, "step": 5149 }, { "epoch": 0.6890553920256891, "grad_norm": 27.514564514160156, "learning_rate": 3.109446079743109e-07, "loss": 0.439, "step": 5150 }, { "epoch": 0.6891891891891891, "grad_norm": 32.42329788208008, "learning_rate": 3.108108108108108e-07, "loss": 0.3603, "step": 5151 }, { "epoch": 0.6893229863526893, "grad_norm": 24.10565185546875, "learning_rate": 3.106770136473106e-07, "loss": 0.3323, "step": 5152 }, { "epoch": 0.6894567835161894, "grad_norm": 18.385177612304688, "learning_rate": 3.1054321648381055e-07, "loss": 0.271, "step": 5153 }, { "epoch": 0.6895905806796896, "grad_norm": 25.8032169342041, "learning_rate": 3.104094193203104e-07, "loss": 0.3417, "step": 5154 }, { "epoch": 0.6897243778431897, "grad_norm": 36.19701385498047, "learning_rate": 3.1027562215681025e-07, "loss": 0.3915, "step": 5155 }, { "epoch": 0.6898581750066899, "grad_norm": 30.467451095581055, "learning_rate": 3.1014182499331013e-07, "loss": 0.419, "step": 5156 }, { "epoch": 0.68999197217019, "grad_norm": 25.49123191833496, "learning_rate": 3.1000802782981e-07, "loss": 0.3119, "step": 5157 }, { "epoch": 0.6901257693336901, "grad_norm": 34.91781234741211, "learning_rate": 3.0987423066630984e-07, "loss": 0.3332, "step": 5158 }, { "epoch": 0.6902595664971902, "grad_norm": 44.735572814941406, "learning_rate": 3.097404335028097e-07, "loss": 0.4174, "step": 5159 }, { "epoch": 0.6903933636606904, "grad_norm": 27.556819915771484, "learning_rate": 3.096066363393096e-07, "loss": 0.4423, "step": 5160 }, { "epoch": 0.6905271608241905, "grad_norm": 26.208030700683594, "learning_rate": 3.094728391758095e-07, "loss": 0.2755, "step": 5161 }, { "epoch": 0.6906609579876907, "grad_norm": 23.56456756591797, "learning_rate": 3.093390420123093e-07, "loss": 0.3445, "step": 5162 }, { "epoch": 0.6907947551511908, "grad_norm": 20.329145431518555, "learning_rate": 3.092052448488092e-07, "loss": 0.335, "step": 5163 }, { "epoch": 0.690928552314691, "grad_norm": 27.241670608520508, "learning_rate": 3.0907144768530906e-07, "loss": 0.2799, "step": 5164 }, { "epoch": 0.691062349478191, "grad_norm": 26.168020248413086, "learning_rate": 3.0893765052180894e-07, "loss": 0.2988, "step": 5165 }, { "epoch": 0.6911961466416912, "grad_norm": 30.007930755615234, "learning_rate": 3.0880385335830877e-07, "loss": 0.2364, "step": 5166 }, { "epoch": 0.6913299438051913, "grad_norm": 21.849979400634766, "learning_rate": 3.086700561948087e-07, "loss": 0.345, "step": 5167 }, { "epoch": 0.6914637409686915, "grad_norm": 33.14085388183594, "learning_rate": 3.085362590313085e-07, "loss": 0.3745, "step": 5168 }, { "epoch": 0.6915975381321916, "grad_norm": 32.05314636230469, "learning_rate": 3.084024618678084e-07, "loss": 0.3905, "step": 5169 }, { "epoch": 0.6917313352956918, "grad_norm": 49.963687896728516, "learning_rate": 3.0826866470430823e-07, "loss": 0.4899, "step": 5170 }, { "epoch": 0.6918651324591919, "grad_norm": 42.48655319213867, "learning_rate": 3.0813486754080816e-07, "loss": 0.416, "step": 5171 }, { "epoch": 0.691998929622692, "grad_norm": 44.78777313232422, "learning_rate": 3.08001070377308e-07, "loss": 0.3511, "step": 5172 }, { "epoch": 0.6921327267861921, "grad_norm": 30.754451751708984, "learning_rate": 3.0786727321380787e-07, "loss": 0.3467, "step": 5173 }, { "epoch": 0.6922665239496922, "grad_norm": 20.670536041259766, "learning_rate": 3.077334760503077e-07, "loss": 0.2994, "step": 5174 }, { "epoch": 0.6924003211131924, "grad_norm": 31.585853576660156, "learning_rate": 3.0759967888680763e-07, "loss": 0.4043, "step": 5175 }, { "epoch": 0.6925341182766925, "grad_norm": 36.03487777709961, "learning_rate": 3.0746588172330745e-07, "loss": 0.3525, "step": 5176 }, { "epoch": 0.6926679154401927, "grad_norm": 18.67669105529785, "learning_rate": 3.073320845598073e-07, "loss": 0.2461, "step": 5177 }, { "epoch": 0.6928017126036928, "grad_norm": 25.705631256103516, "learning_rate": 3.071982873963072e-07, "loss": 0.3651, "step": 5178 }, { "epoch": 0.692935509767193, "grad_norm": 33.249855041503906, "learning_rate": 3.0706449023280704e-07, "loss": 0.2942, "step": 5179 }, { "epoch": 0.693069306930693, "grad_norm": 33.78895568847656, "learning_rate": 3.069306930693069e-07, "loss": 0.3836, "step": 5180 }, { "epoch": 0.6932031040941932, "grad_norm": 48.43708038330078, "learning_rate": 3.0679689590580674e-07, "loss": 0.4844, "step": 5181 }, { "epoch": 0.6933369012576933, "grad_norm": 32.388309478759766, "learning_rate": 3.066630987423067e-07, "loss": 0.4693, "step": 5182 }, { "epoch": 0.6934706984211935, "grad_norm": 33.02864074707031, "learning_rate": 3.065293015788065e-07, "loss": 0.5089, "step": 5183 }, { "epoch": 0.6936044955846936, "grad_norm": 36.03180694580078, "learning_rate": 3.063955044153064e-07, "loss": 0.5653, "step": 5184 }, { "epoch": 0.6937382927481938, "grad_norm": 37.412384033203125, "learning_rate": 3.062617072518062e-07, "loss": 0.3909, "step": 5185 }, { "epoch": 0.6938720899116939, "grad_norm": 31.790729522705078, "learning_rate": 3.0612791008830614e-07, "loss": 0.3687, "step": 5186 }, { "epoch": 0.6940058870751941, "grad_norm": 52.85772705078125, "learning_rate": 3.0599411292480597e-07, "loss": 0.4527, "step": 5187 }, { "epoch": 0.6941396842386941, "grad_norm": 45.571250915527344, "learning_rate": 3.0586031576130585e-07, "loss": 0.718, "step": 5188 }, { "epoch": 0.6942734814021942, "grad_norm": 17.656877517700195, "learning_rate": 3.057265185978057e-07, "loss": 0.2917, "step": 5189 }, { "epoch": 0.6944072785656944, "grad_norm": 25.162105560302734, "learning_rate": 3.055927214343056e-07, "loss": 0.4197, "step": 5190 }, { "epoch": 0.6945410757291945, "grad_norm": 42.99433517456055, "learning_rate": 3.0545892427080543e-07, "loss": 0.6607, "step": 5191 }, { "epoch": 0.6946748728926947, "grad_norm": 36.3543586730957, "learning_rate": 3.053251271073053e-07, "loss": 0.4629, "step": 5192 }, { "epoch": 0.6948086700561948, "grad_norm": 32.10483932495117, "learning_rate": 3.051913299438052e-07, "loss": 0.3591, "step": 5193 }, { "epoch": 0.694942467219695, "grad_norm": 39.91219711303711, "learning_rate": 3.0505753278030507e-07, "loss": 0.4429, "step": 5194 }, { "epoch": 0.695076264383195, "grad_norm": 25.905038833618164, "learning_rate": 3.049237356168049e-07, "loss": 0.3371, "step": 5195 }, { "epoch": 0.6952100615466952, "grad_norm": 27.53946876525879, "learning_rate": 3.047899384533048e-07, "loss": 0.3934, "step": 5196 }, { "epoch": 0.6953438587101953, "grad_norm": 36.057559967041016, "learning_rate": 3.0465614128980465e-07, "loss": 0.4291, "step": 5197 }, { "epoch": 0.6954776558736955, "grad_norm": 26.78577995300293, "learning_rate": 3.0452234412630453e-07, "loss": 0.2961, "step": 5198 }, { "epoch": 0.6956114530371956, "grad_norm": 33.99374008178711, "learning_rate": 3.0438854696280436e-07, "loss": 0.4425, "step": 5199 }, { "epoch": 0.6957452502006958, "grad_norm": 28.595518112182617, "learning_rate": 3.042547497993043e-07, "loss": 0.2204, "step": 5200 }, { "epoch": 0.6958790473641959, "grad_norm": 21.287357330322266, "learning_rate": 3.041209526358041e-07, "loss": 0.3023, "step": 5201 }, { "epoch": 0.696012844527696, "grad_norm": 31.051645278930664, "learning_rate": 3.0398715547230394e-07, "loss": 0.2807, "step": 5202 }, { "epoch": 0.6961466416911961, "grad_norm": 41.695945739746094, "learning_rate": 3.038533583088038e-07, "loss": 0.407, "step": 5203 }, { "epoch": 0.6962804388546963, "grad_norm": 24.694894790649414, "learning_rate": 3.037195611453037e-07, "loss": 0.2928, "step": 5204 }, { "epoch": 0.6964142360181964, "grad_norm": 48.11061477661133, "learning_rate": 3.035857639818036e-07, "loss": 0.6698, "step": 5205 }, { "epoch": 0.6965480331816966, "grad_norm": 25.545482635498047, "learning_rate": 3.034519668183034e-07, "loss": 0.299, "step": 5206 }, { "epoch": 0.6966818303451967, "grad_norm": 26.252763748168945, "learning_rate": 3.033181696548033e-07, "loss": 0.4056, "step": 5207 }, { "epoch": 0.6968156275086969, "grad_norm": 43.76481628417969, "learning_rate": 3.0318437249130317e-07, "loss": 0.6523, "step": 5208 }, { "epoch": 0.696949424672197, "grad_norm": 20.04449462890625, "learning_rate": 3.0305057532780305e-07, "loss": 0.295, "step": 5209 }, { "epoch": 0.697083221835697, "grad_norm": 40.98612594604492, "learning_rate": 3.0291677816430287e-07, "loss": 0.4544, "step": 5210 }, { "epoch": 0.6972170189991972, "grad_norm": 25.217397689819336, "learning_rate": 3.027829810008028e-07, "loss": 0.3939, "step": 5211 }, { "epoch": 0.6973508161626973, "grad_norm": 35.74287796020508, "learning_rate": 3.0264918383730263e-07, "loss": 0.4806, "step": 5212 }, { "epoch": 0.6974846133261975, "grad_norm": 39.730133056640625, "learning_rate": 3.025153866738025e-07, "loss": 0.4877, "step": 5213 }, { "epoch": 0.6976184104896976, "grad_norm": 22.4663028717041, "learning_rate": 3.0238158951030234e-07, "loss": 0.2736, "step": 5214 }, { "epoch": 0.6977522076531978, "grad_norm": 35.397483825683594, "learning_rate": 3.0224779234680227e-07, "loss": 0.2976, "step": 5215 }, { "epoch": 0.6978860048166979, "grad_norm": 20.60930633544922, "learning_rate": 3.021139951833021e-07, "loss": 0.2952, "step": 5216 }, { "epoch": 0.698019801980198, "grad_norm": 44.6405029296875, "learning_rate": 3.01980198019802e-07, "loss": 0.352, "step": 5217 }, { "epoch": 0.6981535991436981, "grad_norm": 15.422455787658691, "learning_rate": 3.018464008563018e-07, "loss": 0.2098, "step": 5218 }, { "epoch": 0.6982873963071983, "grad_norm": 66.3851089477539, "learning_rate": 3.0171260369280173e-07, "loss": 0.4862, "step": 5219 }, { "epoch": 0.6984211934706984, "grad_norm": 28.327251434326172, "learning_rate": 3.0157880652930156e-07, "loss": 0.3384, "step": 5220 }, { "epoch": 0.6985549906341986, "grad_norm": 20.889450073242188, "learning_rate": 3.0144500936580144e-07, "loss": 0.4055, "step": 5221 }, { "epoch": 0.6986887877976987, "grad_norm": 31.586183547973633, "learning_rate": 3.013112122023013e-07, "loss": 0.3438, "step": 5222 }, { "epoch": 0.6988225849611989, "grad_norm": 31.12088394165039, "learning_rate": 3.011774150388012e-07, "loss": 0.4729, "step": 5223 }, { "epoch": 0.6989563821246989, "grad_norm": 25.037464141845703, "learning_rate": 3.01043617875301e-07, "loss": 0.2765, "step": 5224 }, { "epoch": 0.699090179288199, "grad_norm": 23.485595703125, "learning_rate": 3.009098207118009e-07, "loss": 0.4019, "step": 5225 }, { "epoch": 0.6992239764516992, "grad_norm": 22.777904510498047, "learning_rate": 3.007760235483008e-07, "loss": 0.4419, "step": 5226 }, { "epoch": 0.6993577736151994, "grad_norm": 34.75687026977539, "learning_rate": 3.006422263848006e-07, "loss": 0.2658, "step": 5227 }, { "epoch": 0.6994915707786995, "grad_norm": 27.12865447998047, "learning_rate": 3.005084292213005e-07, "loss": 0.4431, "step": 5228 }, { "epoch": 0.6996253679421996, "grad_norm": 35.8483772277832, "learning_rate": 3.003746320578003e-07, "loss": 0.5175, "step": 5229 }, { "epoch": 0.6997591651056998, "grad_norm": 27.16912841796875, "learning_rate": 3.0024083489430025e-07, "loss": 0.3054, "step": 5230 }, { "epoch": 0.6998929622692, "grad_norm": 42.96989440917969, "learning_rate": 3.0010703773080007e-07, "loss": 0.4456, "step": 5231 }, { "epoch": 0.7000267594327, "grad_norm": 41.90421676635742, "learning_rate": 2.9997324056729995e-07, "loss": 0.5789, "step": 5232 }, { "epoch": 0.7001605565962001, "grad_norm": 37.87692642211914, "learning_rate": 2.9983944340379983e-07, "loss": 0.3962, "step": 5233 }, { "epoch": 0.7002943537597003, "grad_norm": 40.379459381103516, "learning_rate": 2.997056462402997e-07, "loss": 0.436, "step": 5234 }, { "epoch": 0.7004281509232004, "grad_norm": 42.89056396484375, "learning_rate": 2.9957184907679954e-07, "loss": 0.4844, "step": 5235 }, { "epoch": 0.7005619480867006, "grad_norm": 39.8333854675293, "learning_rate": 2.994380519132994e-07, "loss": 0.6085, "step": 5236 }, { "epoch": 0.7006957452502007, "grad_norm": 28.343217849731445, "learning_rate": 2.993042547497993e-07, "loss": 0.3933, "step": 5237 }, { "epoch": 0.7008295424137009, "grad_norm": 21.09156608581543, "learning_rate": 2.991704575862992e-07, "loss": 0.3124, "step": 5238 }, { "epoch": 0.7009633395772009, "grad_norm": 25.698949813842773, "learning_rate": 2.99036660422799e-07, "loss": 0.2515, "step": 5239 }, { "epoch": 0.7010971367407011, "grad_norm": 11.844663619995117, "learning_rate": 2.989028632592989e-07, "loss": 0.0991, "step": 5240 }, { "epoch": 0.7012309339042012, "grad_norm": 28.896028518676758, "learning_rate": 2.9876906609579876e-07, "loss": 0.376, "step": 5241 }, { "epoch": 0.7013647310677014, "grad_norm": 36.53395462036133, "learning_rate": 2.9863526893229864e-07, "loss": 0.4936, "step": 5242 }, { "epoch": 0.7014985282312015, "grad_norm": 27.002779006958008, "learning_rate": 2.9850147176879846e-07, "loss": 0.2999, "step": 5243 }, { "epoch": 0.7016323253947017, "grad_norm": 27.172618865966797, "learning_rate": 2.983676746052984e-07, "loss": 0.429, "step": 5244 }, { "epoch": 0.7017661225582018, "grad_norm": 25.620079040527344, "learning_rate": 2.982338774417982e-07, "loss": 0.4058, "step": 5245 }, { "epoch": 0.7018999197217018, "grad_norm": 22.669708251953125, "learning_rate": 2.981000802782981e-07, "loss": 0.398, "step": 5246 }, { "epoch": 0.702033716885202, "grad_norm": 23.943140029907227, "learning_rate": 2.9796628311479793e-07, "loss": 0.1643, "step": 5247 }, { "epoch": 0.7021675140487021, "grad_norm": 27.21938705444336, "learning_rate": 2.9783248595129786e-07, "loss": 0.3333, "step": 5248 }, { "epoch": 0.7023013112122023, "grad_norm": 23.57223129272461, "learning_rate": 2.976986887877977e-07, "loss": 0.3873, "step": 5249 }, { "epoch": 0.7024351083757024, "grad_norm": 72.45610046386719, "learning_rate": 2.9756489162429757e-07, "loss": 0.9702, "step": 5250 }, { "epoch": 0.7025689055392026, "grad_norm": 30.36003303527832, "learning_rate": 2.974310944607974e-07, "loss": 0.3903, "step": 5251 }, { "epoch": 0.7027027027027027, "grad_norm": 25.998075485229492, "learning_rate": 2.972972972972973e-07, "loss": 0.3827, "step": 5252 }, { "epoch": 0.7028364998662029, "grad_norm": 19.789718627929688, "learning_rate": 2.9716350013379715e-07, "loss": 0.1967, "step": 5253 }, { "epoch": 0.7029702970297029, "grad_norm": 24.609291076660156, "learning_rate": 2.97029702970297e-07, "loss": 0.3181, "step": 5254 }, { "epoch": 0.7031040941932031, "grad_norm": 33.10213088989258, "learning_rate": 2.968959058067969e-07, "loss": 0.4806, "step": 5255 }, { "epoch": 0.7032378913567032, "grad_norm": 64.59418487548828, "learning_rate": 2.9676210864329674e-07, "loss": 0.5106, "step": 5256 }, { "epoch": 0.7033716885202034, "grad_norm": 26.239456176757812, "learning_rate": 2.966283114797966e-07, "loss": 0.2783, "step": 5257 }, { "epoch": 0.7035054856837035, "grad_norm": 26.10121726989746, "learning_rate": 2.9649451431629644e-07, "loss": 0.3488, "step": 5258 }, { "epoch": 0.7036392828472037, "grad_norm": 30.596580505371094, "learning_rate": 2.963607171527964e-07, "loss": 0.3782, "step": 5259 }, { "epoch": 0.7037730800107038, "grad_norm": 24.226747512817383, "learning_rate": 2.962269199892962e-07, "loss": 0.3291, "step": 5260 }, { "epoch": 0.7039068771742039, "grad_norm": 36.51139831542969, "learning_rate": 2.960931228257961e-07, "loss": 0.4322, "step": 5261 }, { "epoch": 0.704040674337704, "grad_norm": 29.83783721923828, "learning_rate": 2.959593256622959e-07, "loss": 0.4519, "step": 5262 }, { "epoch": 0.7041744715012042, "grad_norm": 27.289602279663086, "learning_rate": 2.9582552849879584e-07, "loss": 0.2329, "step": 5263 }, { "epoch": 0.7043082686647043, "grad_norm": 38.48670959472656, "learning_rate": 2.9569173133529566e-07, "loss": 0.3565, "step": 5264 }, { "epoch": 0.7044420658282045, "grad_norm": 43.495296478271484, "learning_rate": 2.9555793417179554e-07, "loss": 0.5328, "step": 5265 }, { "epoch": 0.7045758629917046, "grad_norm": 21.13562774658203, "learning_rate": 2.954241370082954e-07, "loss": 0.3716, "step": 5266 }, { "epoch": 0.7047096601552048, "grad_norm": 42.742576599121094, "learning_rate": 2.952903398447953e-07, "loss": 0.4283, "step": 5267 }, { "epoch": 0.7048434573187048, "grad_norm": 54.11484146118164, "learning_rate": 2.9515654268129513e-07, "loss": 0.4824, "step": 5268 }, { "epoch": 0.7049772544822049, "grad_norm": 42.88621520996094, "learning_rate": 2.95022745517795e-07, "loss": 0.2685, "step": 5269 }, { "epoch": 0.7051110516457051, "grad_norm": 45.288360595703125, "learning_rate": 2.948889483542949e-07, "loss": 0.454, "step": 5270 }, { "epoch": 0.7052448488092052, "grad_norm": 26.956844329833984, "learning_rate": 2.9475515119079477e-07, "loss": 0.2912, "step": 5271 }, { "epoch": 0.7053786459727054, "grad_norm": 44.4165153503418, "learning_rate": 2.946213540272946e-07, "loss": 0.2472, "step": 5272 }, { "epoch": 0.7055124431362055, "grad_norm": 38.59382247924805, "learning_rate": 2.9448755686379447e-07, "loss": 0.419, "step": 5273 }, { "epoch": 0.7056462402997057, "grad_norm": 34.47614288330078, "learning_rate": 2.9435375970029435e-07, "loss": 0.3076, "step": 5274 }, { "epoch": 0.7057800374632058, "grad_norm": 46.376461029052734, "learning_rate": 2.9421996253679423e-07, "loss": 0.5867, "step": 5275 }, { "epoch": 0.7059138346267059, "grad_norm": 56.21623229980469, "learning_rate": 2.9408616537329406e-07, "loss": 0.5772, "step": 5276 }, { "epoch": 0.706047631790206, "grad_norm": 41.29158020019531, "learning_rate": 2.93952368209794e-07, "loss": 0.4711, "step": 5277 }, { "epoch": 0.7061814289537062, "grad_norm": 28.285751342773438, "learning_rate": 2.938185710462938e-07, "loss": 0.3018, "step": 5278 }, { "epoch": 0.7063152261172063, "grad_norm": 45.942901611328125, "learning_rate": 2.9368477388279364e-07, "loss": 0.5355, "step": 5279 }, { "epoch": 0.7064490232807065, "grad_norm": 20.861528396606445, "learning_rate": 2.935509767192935e-07, "loss": 0.264, "step": 5280 }, { "epoch": 0.7065828204442066, "grad_norm": 17.36398696899414, "learning_rate": 2.934171795557934e-07, "loss": 0.2567, "step": 5281 }, { "epoch": 0.7067166176077068, "grad_norm": 43.45390319824219, "learning_rate": 2.932833823922933e-07, "loss": 0.2617, "step": 5282 }, { "epoch": 0.7068504147712068, "grad_norm": 53.586177825927734, "learning_rate": 2.931495852287931e-07, "loss": 0.5002, "step": 5283 }, { "epoch": 0.706984211934707, "grad_norm": 27.111188888549805, "learning_rate": 2.93015788065293e-07, "loss": 0.4571, "step": 5284 }, { "epoch": 0.7071180090982071, "grad_norm": 72.42371368408203, "learning_rate": 2.9288199090179286e-07, "loss": 0.4918, "step": 5285 }, { "epoch": 0.7072518062617072, "grad_norm": 18.356748580932617, "learning_rate": 2.9274819373829274e-07, "loss": 0.317, "step": 5286 }, { "epoch": 0.7073856034252074, "grad_norm": 42.367881774902344, "learning_rate": 2.9261439657479257e-07, "loss": 0.3875, "step": 5287 }, { "epoch": 0.7075194005887075, "grad_norm": 22.427261352539062, "learning_rate": 2.924805994112925e-07, "loss": 0.2177, "step": 5288 }, { "epoch": 0.7076531977522077, "grad_norm": 41.22882843017578, "learning_rate": 2.9234680224779233e-07, "loss": 0.4038, "step": 5289 }, { "epoch": 0.7077869949157077, "grad_norm": 22.04157829284668, "learning_rate": 2.922130050842922e-07, "loss": 0.229, "step": 5290 }, { "epoch": 0.7079207920792079, "grad_norm": 36.25357437133789, "learning_rate": 2.9207920792079203e-07, "loss": 0.369, "step": 5291 }, { "epoch": 0.708054589242708, "grad_norm": 20.494964599609375, "learning_rate": 2.9194541075729197e-07, "loss": 0.2384, "step": 5292 }, { "epoch": 0.7081883864062082, "grad_norm": 46.32603454589844, "learning_rate": 2.918116135937918e-07, "loss": 0.4784, "step": 5293 }, { "epoch": 0.7083221835697083, "grad_norm": 30.048450469970703, "learning_rate": 2.9167781643029167e-07, "loss": 0.5554, "step": 5294 }, { "epoch": 0.7084559807332085, "grad_norm": 37.5717887878418, "learning_rate": 2.915440192667915e-07, "loss": 0.578, "step": 5295 }, { "epoch": 0.7085897778967086, "grad_norm": 21.330400466918945, "learning_rate": 2.9141022210329143e-07, "loss": 0.3419, "step": 5296 }, { "epoch": 0.7087235750602088, "grad_norm": 35.79365539550781, "learning_rate": 2.9127642493979126e-07, "loss": 0.3174, "step": 5297 }, { "epoch": 0.7088573722237088, "grad_norm": 23.186065673828125, "learning_rate": 2.9114262777629114e-07, "loss": 0.4366, "step": 5298 }, { "epoch": 0.708991169387209, "grad_norm": 14.570172309875488, "learning_rate": 2.91008830612791e-07, "loss": 0.1557, "step": 5299 }, { "epoch": 0.7091249665507091, "grad_norm": 29.83888816833496, "learning_rate": 2.908750334492909e-07, "loss": 0.3025, "step": 5300 }, { "epoch": 0.7092587637142093, "grad_norm": 37.8806037902832, "learning_rate": 2.907412362857907e-07, "loss": 0.3705, "step": 5301 }, { "epoch": 0.7093925608777094, "grad_norm": 57.07444763183594, "learning_rate": 2.906074391222906e-07, "loss": 0.4784, "step": 5302 }, { "epoch": 0.7095263580412096, "grad_norm": 33.756317138671875, "learning_rate": 2.904736419587905e-07, "loss": 0.3247, "step": 5303 }, { "epoch": 0.7096601552047097, "grad_norm": 36.31431579589844, "learning_rate": 2.9033984479529036e-07, "loss": 0.3543, "step": 5304 }, { "epoch": 0.7097939523682097, "grad_norm": 56.479393005371094, "learning_rate": 2.902060476317902e-07, "loss": 0.6214, "step": 5305 }, { "epoch": 0.7099277495317099, "grad_norm": 22.732229232788086, "learning_rate": 2.9007225046829e-07, "loss": 0.3307, "step": 5306 }, { "epoch": 0.71006154669521, "grad_norm": 36.485843658447266, "learning_rate": 2.8993845330478994e-07, "loss": 0.3945, "step": 5307 }, { "epoch": 0.7101953438587102, "grad_norm": 35.07688903808594, "learning_rate": 2.8980465614128977e-07, "loss": 0.4408, "step": 5308 }, { "epoch": 0.7103291410222103, "grad_norm": 31.512948989868164, "learning_rate": 2.8967085897778965e-07, "loss": 0.4182, "step": 5309 }, { "epoch": 0.7104629381857105, "grad_norm": 28.657316207885742, "learning_rate": 2.8953706181428953e-07, "loss": 0.2731, "step": 5310 }, { "epoch": 0.7105967353492106, "grad_norm": 36.69027328491211, "learning_rate": 2.894032646507894e-07, "loss": 0.5716, "step": 5311 }, { "epoch": 0.7107305325127107, "grad_norm": 42.47652816772461, "learning_rate": 2.8926946748728923e-07, "loss": 0.7045, "step": 5312 }, { "epoch": 0.7108643296762108, "grad_norm": 59.03776931762695, "learning_rate": 2.891356703237891e-07, "loss": 0.5877, "step": 5313 }, { "epoch": 0.710998126839711, "grad_norm": 46.88302993774414, "learning_rate": 2.89001873160289e-07, "loss": 0.4831, "step": 5314 }, { "epoch": 0.7111319240032111, "grad_norm": 38.888309478759766, "learning_rate": 2.8886807599678887e-07, "loss": 0.3398, "step": 5315 }, { "epoch": 0.7112657211667113, "grad_norm": 42.17565155029297, "learning_rate": 2.887342788332887e-07, "loss": 0.4897, "step": 5316 }, { "epoch": 0.7113995183302114, "grad_norm": 25.703201293945312, "learning_rate": 2.886004816697886e-07, "loss": 0.6068, "step": 5317 }, { "epoch": 0.7115333154937116, "grad_norm": 39.89419937133789, "learning_rate": 2.8846668450628846e-07, "loss": 0.3448, "step": 5318 }, { "epoch": 0.7116671126572117, "grad_norm": 31.105775833129883, "learning_rate": 2.8833288734278834e-07, "loss": 0.2864, "step": 5319 }, { "epoch": 0.7118009098207118, "grad_norm": 34.6909294128418, "learning_rate": 2.8819909017928816e-07, "loss": 0.2809, "step": 5320 }, { "epoch": 0.7119347069842119, "grad_norm": 39.07752227783203, "learning_rate": 2.8806529301578804e-07, "loss": 0.3539, "step": 5321 }, { "epoch": 0.7120685041477121, "grad_norm": 26.336790084838867, "learning_rate": 2.879314958522879e-07, "loss": 0.3272, "step": 5322 }, { "epoch": 0.7122023013112122, "grad_norm": 28.676708221435547, "learning_rate": 2.877976986887878e-07, "loss": 0.3729, "step": 5323 }, { "epoch": 0.7123360984747124, "grad_norm": 30.24585723876953, "learning_rate": 2.8766390152528763e-07, "loss": 0.5045, "step": 5324 }, { "epoch": 0.7124698956382125, "grad_norm": 19.817291259765625, "learning_rate": 2.8753010436178756e-07, "loss": 0.2722, "step": 5325 }, { "epoch": 0.7126036928017127, "grad_norm": 28.81949234008789, "learning_rate": 2.873963071982874e-07, "loss": 0.375, "step": 5326 }, { "epoch": 0.7127374899652127, "grad_norm": 31.10994529724121, "learning_rate": 2.8726251003478726e-07, "loss": 0.362, "step": 5327 }, { "epoch": 0.7128712871287128, "grad_norm": 46.372493743896484, "learning_rate": 2.871287128712871e-07, "loss": 0.5421, "step": 5328 }, { "epoch": 0.713005084292213, "grad_norm": 67.59211730957031, "learning_rate": 2.86994915707787e-07, "loss": 0.7012, "step": 5329 }, { "epoch": 0.7131388814557131, "grad_norm": 27.04210090637207, "learning_rate": 2.8686111854428685e-07, "loss": 0.4213, "step": 5330 }, { "epoch": 0.7132726786192133, "grad_norm": 29.850854873657227, "learning_rate": 2.867273213807867e-07, "loss": 0.3888, "step": 5331 }, { "epoch": 0.7134064757827134, "grad_norm": 28.246191024780273, "learning_rate": 2.8659352421728656e-07, "loss": 0.2611, "step": 5332 }, { "epoch": 0.7135402729462136, "grad_norm": 35.00990295410156, "learning_rate": 2.8645972705378643e-07, "loss": 0.5179, "step": 5333 }, { "epoch": 0.7136740701097136, "grad_norm": 22.97087860107422, "learning_rate": 2.863259298902863e-07, "loss": 0.2318, "step": 5334 }, { "epoch": 0.7138078672732138, "grad_norm": 29.718286514282227, "learning_rate": 2.8619213272678614e-07, "loss": 0.3577, "step": 5335 }, { "epoch": 0.7139416644367139, "grad_norm": 40.13438415527344, "learning_rate": 2.8605833556328607e-07, "loss": 0.5203, "step": 5336 }, { "epoch": 0.7140754616002141, "grad_norm": 44.27842330932617, "learning_rate": 2.859245383997859e-07, "loss": 0.526, "step": 5337 }, { "epoch": 0.7142092587637142, "grad_norm": 17.396867752075195, "learning_rate": 2.857907412362858e-07, "loss": 0.1713, "step": 5338 }, { "epoch": 0.7143430559272144, "grad_norm": 37.677982330322266, "learning_rate": 2.856569440727856e-07, "loss": 0.5522, "step": 5339 }, { "epoch": 0.7144768530907145, "grad_norm": 40.46981430053711, "learning_rate": 2.8552314690928554e-07, "loss": 0.3325, "step": 5340 }, { "epoch": 0.7146106502542147, "grad_norm": 45.3526611328125, "learning_rate": 2.8538934974578536e-07, "loss": 0.5785, "step": 5341 }, { "epoch": 0.7147444474177147, "grad_norm": 54.61156463623047, "learning_rate": 2.8525555258228524e-07, "loss": 0.6149, "step": 5342 }, { "epoch": 0.7148782445812149, "grad_norm": 29.849658966064453, "learning_rate": 2.8512175541878507e-07, "loss": 0.246, "step": 5343 }, { "epoch": 0.715012041744715, "grad_norm": 25.590328216552734, "learning_rate": 2.84987958255285e-07, "loss": 0.2999, "step": 5344 }, { "epoch": 0.7151458389082151, "grad_norm": 47.11568069458008, "learning_rate": 2.8485416109178483e-07, "loss": 0.7382, "step": 5345 }, { "epoch": 0.7152796360717153, "grad_norm": 30.034547805786133, "learning_rate": 2.847203639282847e-07, "loss": 0.518, "step": 5346 }, { "epoch": 0.7154134332352154, "grad_norm": 27.044408798217773, "learning_rate": 2.845865667647846e-07, "loss": 0.2267, "step": 5347 }, { "epoch": 0.7155472303987156, "grad_norm": 37.41008377075195, "learning_rate": 2.8445276960128446e-07, "loss": 0.4823, "step": 5348 }, { "epoch": 0.7156810275622156, "grad_norm": 40.61807632446289, "learning_rate": 2.843189724377843e-07, "loss": 0.4519, "step": 5349 }, { "epoch": 0.7158148247257158, "grad_norm": 32.76748275756836, "learning_rate": 2.8418517527428417e-07, "loss": 0.3631, "step": 5350 }, { "epoch": 0.7159486218892159, "grad_norm": 29.903636932373047, "learning_rate": 2.8405137811078405e-07, "loss": 0.2383, "step": 5351 }, { "epoch": 0.7160824190527161, "grad_norm": 37.76803207397461, "learning_rate": 2.8391758094728393e-07, "loss": 0.5191, "step": 5352 }, { "epoch": 0.7162162162162162, "grad_norm": 30.92650032043457, "learning_rate": 2.8378378378378376e-07, "loss": 0.5017, "step": 5353 }, { "epoch": 0.7163500133797164, "grad_norm": 46.46158981323242, "learning_rate": 2.8364998662028363e-07, "loss": 0.3309, "step": 5354 }, { "epoch": 0.7164838105432165, "grad_norm": 28.40509796142578, "learning_rate": 2.835161894567835e-07, "loss": 0.4692, "step": 5355 }, { "epoch": 0.7166176077067166, "grad_norm": 51.837223052978516, "learning_rate": 2.833823922932834e-07, "loss": 0.5864, "step": 5356 }, { "epoch": 0.7167514048702167, "grad_norm": 19.380001068115234, "learning_rate": 2.832485951297832e-07, "loss": 0.1882, "step": 5357 }, { "epoch": 0.7168852020337169, "grad_norm": 27.673582077026367, "learning_rate": 2.8311479796628315e-07, "loss": 0.3748, "step": 5358 }, { "epoch": 0.717018999197217, "grad_norm": 35.98404312133789, "learning_rate": 2.82981000802783e-07, "loss": 0.7713, "step": 5359 }, { "epoch": 0.7171527963607172, "grad_norm": 41.26713562011719, "learning_rate": 2.828472036392828e-07, "loss": 0.5042, "step": 5360 }, { "epoch": 0.7172865935242173, "grad_norm": 17.09523582458496, "learning_rate": 2.827134064757827e-07, "loss": 0.2085, "step": 5361 }, { "epoch": 0.7174203906877175, "grad_norm": 28.812458038330078, "learning_rate": 2.8257960931228256e-07, "loss": 0.4809, "step": 5362 }, { "epoch": 0.7175541878512176, "grad_norm": 37.599464416503906, "learning_rate": 2.8244581214878244e-07, "loss": 0.3768, "step": 5363 }, { "epoch": 0.7176879850147176, "grad_norm": 22.009653091430664, "learning_rate": 2.8231201498528227e-07, "loss": 0.2662, "step": 5364 }, { "epoch": 0.7178217821782178, "grad_norm": 29.804452896118164, "learning_rate": 2.8217821782178215e-07, "loss": 0.3903, "step": 5365 }, { "epoch": 0.7179555793417179, "grad_norm": 39.51920700073242, "learning_rate": 2.8204442065828203e-07, "loss": 0.2578, "step": 5366 }, { "epoch": 0.7180893765052181, "grad_norm": 35.0211181640625, "learning_rate": 2.819106234947819e-07, "loss": 0.4706, "step": 5367 }, { "epoch": 0.7182231736687182, "grad_norm": 39.44925308227539, "learning_rate": 2.8177682633128173e-07, "loss": 0.3881, "step": 5368 }, { "epoch": 0.7183569708322184, "grad_norm": 22.95878791809082, "learning_rate": 2.8164302916778166e-07, "loss": 0.2634, "step": 5369 }, { "epoch": 0.7184907679957185, "grad_norm": 33.76576232910156, "learning_rate": 2.815092320042815e-07, "loss": 0.5001, "step": 5370 }, { "epoch": 0.7186245651592186, "grad_norm": 40.06336975097656, "learning_rate": 2.8137543484078137e-07, "loss": 0.4009, "step": 5371 }, { "epoch": 0.7187583623227187, "grad_norm": 37.35343551635742, "learning_rate": 2.812416376772812e-07, "loss": 0.3435, "step": 5372 }, { "epoch": 0.7188921594862189, "grad_norm": 33.11797332763672, "learning_rate": 2.8110784051378113e-07, "loss": 0.2441, "step": 5373 }, { "epoch": 0.719025956649719, "grad_norm": 35.66158676147461, "learning_rate": 2.8097404335028096e-07, "loss": 0.3748, "step": 5374 }, { "epoch": 0.7191597538132192, "grad_norm": 32.515254974365234, "learning_rate": 2.8084024618678083e-07, "loss": 0.429, "step": 5375 }, { "epoch": 0.7192935509767193, "grad_norm": 32.49070358276367, "learning_rate": 2.8070644902328066e-07, "loss": 0.3902, "step": 5376 }, { "epoch": 0.7194273481402195, "grad_norm": 36.64565658569336, "learning_rate": 2.805726518597806e-07, "loss": 0.3856, "step": 5377 }, { "epoch": 0.7195611453037195, "grad_norm": 31.653640747070312, "learning_rate": 2.804388546962804e-07, "loss": 0.3087, "step": 5378 }, { "epoch": 0.7196949424672197, "grad_norm": 22.319080352783203, "learning_rate": 2.803050575327803e-07, "loss": 0.2776, "step": 5379 }, { "epoch": 0.7198287396307198, "grad_norm": 22.367109298706055, "learning_rate": 2.801712603692802e-07, "loss": 0.2704, "step": 5380 }, { "epoch": 0.71996253679422, "grad_norm": 32.633426666259766, "learning_rate": 2.8003746320578006e-07, "loss": 0.3375, "step": 5381 }, { "epoch": 0.7200963339577201, "grad_norm": 21.533292770385742, "learning_rate": 2.799036660422799e-07, "loss": 0.3173, "step": 5382 }, { "epoch": 0.7202301311212203, "grad_norm": 50.17787551879883, "learning_rate": 2.797698688787797e-07, "loss": 0.7452, "step": 5383 }, { "epoch": 0.7203639282847204, "grad_norm": 38.902626037597656, "learning_rate": 2.7963607171527964e-07, "loss": 0.3893, "step": 5384 }, { "epoch": 0.7204977254482205, "grad_norm": 37.74739074707031, "learning_rate": 2.7950227455177947e-07, "loss": 0.3759, "step": 5385 }, { "epoch": 0.7206315226117206, "grad_norm": 26.414867401123047, "learning_rate": 2.7936847738827935e-07, "loss": 0.2697, "step": 5386 }, { "epoch": 0.7207653197752207, "grad_norm": 22.219669342041016, "learning_rate": 2.792346802247792e-07, "loss": 0.3247, "step": 5387 }, { "epoch": 0.7208991169387209, "grad_norm": 28.792705535888672, "learning_rate": 2.791008830612791e-07, "loss": 0.3198, "step": 5388 }, { "epoch": 0.721032914102221, "grad_norm": 25.756444931030273, "learning_rate": 2.7896708589777893e-07, "loss": 0.1898, "step": 5389 }, { "epoch": 0.7211667112657212, "grad_norm": 25.407602310180664, "learning_rate": 2.788332887342788e-07, "loss": 0.3274, "step": 5390 }, { "epoch": 0.7213005084292213, "grad_norm": 35.70065689086914, "learning_rate": 2.786994915707787e-07, "loss": 0.3787, "step": 5391 }, { "epoch": 0.7214343055927215, "grad_norm": 36.58345031738281, "learning_rate": 2.7856569440727857e-07, "loss": 0.2177, "step": 5392 }, { "epoch": 0.7215681027562215, "grad_norm": 27.350452423095703, "learning_rate": 2.784318972437784e-07, "loss": 0.3373, "step": 5393 }, { "epoch": 0.7217018999197217, "grad_norm": 29.10093879699707, "learning_rate": 2.782981000802783e-07, "loss": 0.3459, "step": 5394 }, { "epoch": 0.7218356970832218, "grad_norm": 22.451650619506836, "learning_rate": 2.7816430291677816e-07, "loss": 0.17, "step": 5395 }, { "epoch": 0.721969494246722, "grad_norm": 46.76890563964844, "learning_rate": 2.7803050575327803e-07, "loss": 0.5464, "step": 5396 }, { "epoch": 0.7221032914102221, "grad_norm": 55.712806701660156, "learning_rate": 2.7789670858977786e-07, "loss": 0.4552, "step": 5397 }, { "epoch": 0.7222370885737223, "grad_norm": 22.606422424316406, "learning_rate": 2.7776291142627774e-07, "loss": 0.3727, "step": 5398 }, { "epoch": 0.7223708857372224, "grad_norm": 40.62294006347656, "learning_rate": 2.776291142627776e-07, "loss": 0.4307, "step": 5399 }, { "epoch": 0.7225046829007225, "grad_norm": 35.84662628173828, "learning_rate": 2.774953170992775e-07, "loss": 0.3033, "step": 5400 }, { "epoch": 0.7226384800642226, "grad_norm": 25.899188995361328, "learning_rate": 2.773615199357773e-07, "loss": 0.3844, "step": 5401 }, { "epoch": 0.7227722772277227, "grad_norm": 51.28428649902344, "learning_rate": 2.7722772277227726e-07, "loss": 0.4795, "step": 5402 }, { "epoch": 0.7229060743912229, "grad_norm": 46.41463088989258, "learning_rate": 2.770939256087771e-07, "loss": 0.4298, "step": 5403 }, { "epoch": 0.723039871554723, "grad_norm": 41.224525451660156, "learning_rate": 2.7696012844527696e-07, "loss": 0.468, "step": 5404 }, { "epoch": 0.7231736687182232, "grad_norm": 25.675745010375977, "learning_rate": 2.768263312817768e-07, "loss": 0.3249, "step": 5405 }, { "epoch": 0.7233074658817233, "grad_norm": 46.70011520385742, "learning_rate": 2.766925341182767e-07, "loss": 0.6649, "step": 5406 }, { "epoch": 0.7234412630452235, "grad_norm": 47.18900680541992, "learning_rate": 2.7655873695477655e-07, "loss": 0.4672, "step": 5407 }, { "epoch": 0.7235750602087235, "grad_norm": 21.35694694519043, "learning_rate": 2.7642493979127643e-07, "loss": 0.2449, "step": 5408 }, { "epoch": 0.7237088573722237, "grad_norm": 35.839752197265625, "learning_rate": 2.7629114262777625e-07, "loss": 0.4623, "step": 5409 }, { "epoch": 0.7238426545357238, "grad_norm": 20.127225875854492, "learning_rate": 2.761573454642762e-07, "loss": 0.233, "step": 5410 }, { "epoch": 0.723976451699224, "grad_norm": 30.024045944213867, "learning_rate": 2.76023548300776e-07, "loss": 0.1939, "step": 5411 }, { "epoch": 0.7241102488627241, "grad_norm": 28.08783531188965, "learning_rate": 2.7588975113727584e-07, "loss": 0.3932, "step": 5412 }, { "epoch": 0.7242440460262243, "grad_norm": 49.61100769042969, "learning_rate": 2.7575595397377577e-07, "loss": 0.4387, "step": 5413 }, { "epoch": 0.7243778431897244, "grad_norm": 32.63918685913086, "learning_rate": 2.756221568102756e-07, "loss": 0.3844, "step": 5414 }, { "epoch": 0.7245116403532245, "grad_norm": 31.777599334716797, "learning_rate": 2.754883596467755e-07, "loss": 0.2732, "step": 5415 }, { "epoch": 0.7246454375167246, "grad_norm": 32.81201934814453, "learning_rate": 2.753545624832753e-07, "loss": 0.4053, "step": 5416 }, { "epoch": 0.7247792346802248, "grad_norm": 22.728092193603516, "learning_rate": 2.7522076531977523e-07, "loss": 0.3416, "step": 5417 }, { "epoch": 0.7249130318437249, "grad_norm": 46.99352264404297, "learning_rate": 2.7508696815627506e-07, "loss": 0.5073, "step": 5418 }, { "epoch": 0.7250468290072251, "grad_norm": 35.43782043457031, "learning_rate": 2.7495317099277494e-07, "loss": 0.4977, "step": 5419 }, { "epoch": 0.7251806261707252, "grad_norm": 37.11955261230469, "learning_rate": 2.7481937382927477e-07, "loss": 0.2516, "step": 5420 }, { "epoch": 0.7253144233342254, "grad_norm": 24.253599166870117, "learning_rate": 2.746855766657747e-07, "loss": 0.282, "step": 5421 }, { "epoch": 0.7254482204977254, "grad_norm": 34.27128219604492, "learning_rate": 2.745517795022745e-07, "loss": 0.6081, "step": 5422 }, { "epoch": 0.7255820176612255, "grad_norm": 37.47230911254883, "learning_rate": 2.744179823387744e-07, "loss": 0.3952, "step": 5423 }, { "epoch": 0.7257158148247257, "grad_norm": 34.18154525756836, "learning_rate": 2.742841851752743e-07, "loss": 0.4772, "step": 5424 }, { "epoch": 0.7258496119882258, "grad_norm": 31.062604904174805, "learning_rate": 2.7415038801177416e-07, "loss": 0.3521, "step": 5425 }, { "epoch": 0.725983409151726, "grad_norm": 35.97780990600586, "learning_rate": 2.74016590848274e-07, "loss": 0.3254, "step": 5426 }, { "epoch": 0.7261172063152261, "grad_norm": 36.44972229003906, "learning_rate": 2.7388279368477387e-07, "loss": 0.4159, "step": 5427 }, { "epoch": 0.7262510034787263, "grad_norm": 34.944305419921875, "learning_rate": 2.7374899652127375e-07, "loss": 0.5679, "step": 5428 }, { "epoch": 0.7263848006422264, "grad_norm": 32.91777801513672, "learning_rate": 2.7361519935777363e-07, "loss": 0.4642, "step": 5429 }, { "epoch": 0.7265185978057265, "grad_norm": 39.808570861816406, "learning_rate": 2.7348140219427345e-07, "loss": 0.6263, "step": 5430 }, { "epoch": 0.7266523949692266, "grad_norm": 34.5972785949707, "learning_rate": 2.7334760503077333e-07, "loss": 0.4585, "step": 5431 }, { "epoch": 0.7267861921327268, "grad_norm": 27.364606857299805, "learning_rate": 2.732138078672732e-07, "loss": 0.3508, "step": 5432 }, { "epoch": 0.7269199892962269, "grad_norm": 32.29902267456055, "learning_rate": 2.730800107037731e-07, "loss": 0.3872, "step": 5433 }, { "epoch": 0.7270537864597271, "grad_norm": 38.384395599365234, "learning_rate": 2.729462135402729e-07, "loss": 0.481, "step": 5434 }, { "epoch": 0.7271875836232272, "grad_norm": 28.16619300842285, "learning_rate": 2.7281241637677285e-07, "loss": 0.3891, "step": 5435 }, { "epoch": 0.7273213807867274, "grad_norm": 28.49008560180664, "learning_rate": 2.726786192132727e-07, "loss": 0.3375, "step": 5436 }, { "epoch": 0.7274551779502274, "grad_norm": 21.593191146850586, "learning_rate": 2.725448220497725e-07, "loss": 0.2775, "step": 5437 }, { "epoch": 0.7275889751137276, "grad_norm": 25.0470027923584, "learning_rate": 2.724110248862724e-07, "loss": 0.336, "step": 5438 }, { "epoch": 0.7277227722772277, "grad_norm": 26.127670288085938, "learning_rate": 2.7227722772277226e-07, "loss": 0.3247, "step": 5439 }, { "epoch": 0.7278565694407279, "grad_norm": 25.690937042236328, "learning_rate": 2.7214343055927214e-07, "loss": 0.3376, "step": 5440 }, { "epoch": 0.727990366604228, "grad_norm": 37.72236251831055, "learning_rate": 2.7200963339577197e-07, "loss": 0.477, "step": 5441 }, { "epoch": 0.7281241637677281, "grad_norm": 31.812759399414062, "learning_rate": 2.7187583623227185e-07, "loss": 0.4646, "step": 5442 }, { "epoch": 0.7282579609312283, "grad_norm": 21.715187072753906, "learning_rate": 2.717420390687717e-07, "loss": 0.2943, "step": 5443 }, { "epoch": 0.7283917580947283, "grad_norm": 33.835662841796875, "learning_rate": 2.716082419052716e-07, "loss": 0.3308, "step": 5444 }, { "epoch": 0.7285255552582285, "grad_norm": 46.09088897705078, "learning_rate": 2.7147444474177143e-07, "loss": 0.5735, "step": 5445 }, { "epoch": 0.7286593524217286, "grad_norm": 21.360214233398438, "learning_rate": 2.7134064757827136e-07, "loss": 0.2826, "step": 5446 }, { "epoch": 0.7287931495852288, "grad_norm": 28.96116065979004, "learning_rate": 2.712068504147712e-07, "loss": 0.4041, "step": 5447 }, { "epoch": 0.7289269467487289, "grad_norm": 33.11277389526367, "learning_rate": 2.7107305325127107e-07, "loss": 0.2727, "step": 5448 }, { "epoch": 0.7290607439122291, "grad_norm": 47.39077377319336, "learning_rate": 2.709392560877709e-07, "loss": 0.5351, "step": 5449 }, { "epoch": 0.7291945410757292, "grad_norm": 28.579607009887695, "learning_rate": 2.7080545892427083e-07, "loss": 0.3217, "step": 5450 }, { "epoch": 0.7293283382392294, "grad_norm": 23.323511123657227, "learning_rate": 2.7067166176077065e-07, "loss": 0.4277, "step": 5451 }, { "epoch": 0.7294621354027294, "grad_norm": 31.95388412475586, "learning_rate": 2.7053786459727053e-07, "loss": 0.3597, "step": 5452 }, { "epoch": 0.7295959325662296, "grad_norm": 51.92453384399414, "learning_rate": 2.7040406743377036e-07, "loss": 0.4253, "step": 5453 }, { "epoch": 0.7297297297297297, "grad_norm": 19.948631286621094, "learning_rate": 2.702702702702703e-07, "loss": 0.3031, "step": 5454 }, { "epoch": 0.7298635268932299, "grad_norm": 48.09726333618164, "learning_rate": 2.701364731067701e-07, "loss": 0.3677, "step": 5455 }, { "epoch": 0.72999732405673, "grad_norm": 38.08056640625, "learning_rate": 2.7000267594327e-07, "loss": 0.5345, "step": 5456 }, { "epoch": 0.7301311212202302, "grad_norm": 34.22584533691406, "learning_rate": 2.698688787797699e-07, "loss": 0.3884, "step": 5457 }, { "epoch": 0.7302649183837303, "grad_norm": 57.93080139160156, "learning_rate": 2.6973508161626976e-07, "loss": 0.6806, "step": 5458 }, { "epoch": 0.7303987155472303, "grad_norm": 27.528955459594727, "learning_rate": 2.696012844527696e-07, "loss": 0.2142, "step": 5459 }, { "epoch": 0.7305325127107305, "grad_norm": 17.762680053710938, "learning_rate": 2.6946748728926946e-07, "loss": 0.3284, "step": 5460 }, { "epoch": 0.7306663098742306, "grad_norm": 16.2711124420166, "learning_rate": 2.6933369012576934e-07, "loss": 0.2164, "step": 5461 }, { "epoch": 0.7308001070377308, "grad_norm": 31.811233520507812, "learning_rate": 2.691998929622692e-07, "loss": 0.2969, "step": 5462 }, { "epoch": 0.7309339042012309, "grad_norm": 27.48539161682129, "learning_rate": 2.6906609579876905e-07, "loss": 0.282, "step": 5463 }, { "epoch": 0.7310677013647311, "grad_norm": 20.89492416381836, "learning_rate": 2.6893229863526887e-07, "loss": 0.1448, "step": 5464 }, { "epoch": 0.7312014985282312, "grad_norm": 35.48397445678711, "learning_rate": 2.687985014717688e-07, "loss": 0.4176, "step": 5465 }, { "epoch": 0.7313352956917314, "grad_norm": 22.01637077331543, "learning_rate": 2.6866470430826863e-07, "loss": 0.2195, "step": 5466 }, { "epoch": 0.7314690928552314, "grad_norm": 20.264724731445312, "learning_rate": 2.685309071447685e-07, "loss": 0.2274, "step": 5467 }, { "epoch": 0.7316028900187316, "grad_norm": 44.22323989868164, "learning_rate": 2.683971099812684e-07, "loss": 0.5888, "step": 5468 }, { "epoch": 0.7317366871822317, "grad_norm": 16.121042251586914, "learning_rate": 2.6826331281776827e-07, "loss": 0.2182, "step": 5469 }, { "epoch": 0.7318704843457319, "grad_norm": 39.86767578125, "learning_rate": 2.681295156542681e-07, "loss": 0.4161, "step": 5470 }, { "epoch": 0.732004281509232, "grad_norm": 59.320613861083984, "learning_rate": 2.67995718490768e-07, "loss": 0.483, "step": 5471 }, { "epoch": 0.7321380786727322, "grad_norm": 47.528846740722656, "learning_rate": 2.6786192132726785e-07, "loss": 0.5183, "step": 5472 }, { "epoch": 0.7322718758362323, "grad_norm": 31.987709045410156, "learning_rate": 2.6772812416376773e-07, "loss": 0.3538, "step": 5473 }, { "epoch": 0.7324056729997324, "grad_norm": 61.72765350341797, "learning_rate": 2.6759432700026756e-07, "loss": 0.6299, "step": 5474 }, { "epoch": 0.7325394701632325, "grad_norm": 58.94081115722656, "learning_rate": 2.6746052983676744e-07, "loss": 0.6374, "step": 5475 }, { "epoch": 0.7326732673267327, "grad_norm": 26.74253273010254, "learning_rate": 2.673267326732673e-07, "loss": 0.2426, "step": 5476 }, { "epoch": 0.7328070644902328, "grad_norm": 27.560184478759766, "learning_rate": 2.671929355097672e-07, "loss": 0.2633, "step": 5477 }, { "epoch": 0.732940861653733, "grad_norm": 44.316246032714844, "learning_rate": 2.67059138346267e-07, "loss": 0.4588, "step": 5478 }, { "epoch": 0.7330746588172331, "grad_norm": 30.116371154785156, "learning_rate": 2.6692534118276696e-07, "loss": 0.4173, "step": 5479 }, { "epoch": 0.7332084559807333, "grad_norm": 34.20211410522461, "learning_rate": 2.667915440192668e-07, "loss": 0.3862, "step": 5480 }, { "epoch": 0.7333422531442333, "grad_norm": 47.348175048828125, "learning_rate": 2.6665774685576666e-07, "loss": 0.7975, "step": 5481 }, { "epoch": 0.7334760503077334, "grad_norm": 41.725215911865234, "learning_rate": 2.665239496922665e-07, "loss": 0.5994, "step": 5482 }, { "epoch": 0.7336098474712336, "grad_norm": 54.24884033203125, "learning_rate": 2.663901525287664e-07, "loss": 0.4813, "step": 5483 }, { "epoch": 0.7337436446347337, "grad_norm": 41.99609375, "learning_rate": 2.6625635536526625e-07, "loss": 0.477, "step": 5484 }, { "epoch": 0.7338774417982339, "grad_norm": 43.501155853271484, "learning_rate": 2.661225582017661e-07, "loss": 0.3026, "step": 5485 }, { "epoch": 0.734011238961734, "grad_norm": 37.73198318481445, "learning_rate": 2.6598876103826595e-07, "loss": 0.4876, "step": 5486 }, { "epoch": 0.7341450361252342, "grad_norm": 24.541954040527344, "learning_rate": 2.658549638747659e-07, "loss": 0.3263, "step": 5487 }, { "epoch": 0.7342788332887343, "grad_norm": 64.1943588256836, "learning_rate": 2.657211667112657e-07, "loss": 0.4109, "step": 5488 }, { "epoch": 0.7344126304522344, "grad_norm": 27.05389404296875, "learning_rate": 2.6558736954776554e-07, "loss": 0.464, "step": 5489 }, { "epoch": 0.7345464276157345, "grad_norm": 50.53364562988281, "learning_rate": 2.6545357238426547e-07, "loss": 0.3372, "step": 5490 }, { "epoch": 0.7346802247792347, "grad_norm": 30.24986457824707, "learning_rate": 2.653197752207653e-07, "loss": 0.283, "step": 5491 }, { "epoch": 0.7348140219427348, "grad_norm": 35.1933708190918, "learning_rate": 2.651859780572652e-07, "loss": 0.572, "step": 5492 }, { "epoch": 0.734947819106235, "grad_norm": 40.07086181640625, "learning_rate": 2.65052180893765e-07, "loss": 0.494, "step": 5493 }, { "epoch": 0.7350816162697351, "grad_norm": 27.843852996826172, "learning_rate": 2.6491838373026493e-07, "loss": 0.3002, "step": 5494 }, { "epoch": 0.7352154134332353, "grad_norm": 44.69026565551758, "learning_rate": 2.6478458656676476e-07, "loss": 0.4557, "step": 5495 }, { "epoch": 0.7353492105967353, "grad_norm": 28.92470359802246, "learning_rate": 2.6465078940326464e-07, "loss": 0.2254, "step": 5496 }, { "epoch": 0.7354830077602355, "grad_norm": 26.838504791259766, "learning_rate": 2.6451699223976446e-07, "loss": 0.3239, "step": 5497 }, { "epoch": 0.7356168049237356, "grad_norm": 29.20306396484375, "learning_rate": 2.643831950762644e-07, "loss": 0.4025, "step": 5498 }, { "epoch": 0.7357506020872357, "grad_norm": 22.311368942260742, "learning_rate": 2.642493979127642e-07, "loss": 0.3445, "step": 5499 }, { "epoch": 0.7358843992507359, "grad_norm": 56.834930419921875, "learning_rate": 2.641156007492641e-07, "loss": 0.73, "step": 5500 }, { "epoch": 0.736018196414236, "grad_norm": 42.610103607177734, "learning_rate": 2.63981803585764e-07, "loss": 0.47, "step": 5501 }, { "epoch": 0.7361519935777362, "grad_norm": 26.337032318115234, "learning_rate": 2.6384800642226386e-07, "loss": 0.3497, "step": 5502 }, { "epoch": 0.7362857907412362, "grad_norm": 26.4571533203125, "learning_rate": 2.637142092587637e-07, "loss": 0.4046, "step": 5503 }, { "epoch": 0.7364195879047364, "grad_norm": 29.79010581970215, "learning_rate": 2.6358041209526357e-07, "loss": 0.5082, "step": 5504 }, { "epoch": 0.7365533850682365, "grad_norm": 39.02006530761719, "learning_rate": 2.6344661493176345e-07, "loss": 0.4071, "step": 5505 }, { "epoch": 0.7366871822317367, "grad_norm": 43.35171127319336, "learning_rate": 2.633128177682633e-07, "loss": 0.6233, "step": 5506 }, { "epoch": 0.7368209793952368, "grad_norm": 30.218332290649414, "learning_rate": 2.6317902060476315e-07, "loss": 0.2194, "step": 5507 }, { "epoch": 0.736954776558737, "grad_norm": 26.77632713317871, "learning_rate": 2.6304522344126303e-07, "loss": 0.3844, "step": 5508 }, { "epoch": 0.7370885737222371, "grad_norm": 27.0655460357666, "learning_rate": 2.629114262777629e-07, "loss": 0.2518, "step": 5509 }, { "epoch": 0.7372223708857373, "grad_norm": 32.981292724609375, "learning_rate": 2.627776291142628e-07, "loss": 0.5074, "step": 5510 }, { "epoch": 0.7373561680492373, "grad_norm": 34.182437896728516, "learning_rate": 2.626438319507626e-07, "loss": 0.4369, "step": 5511 }, { "epoch": 0.7374899652127375, "grad_norm": 43.8848991394043, "learning_rate": 2.6251003478726255e-07, "loss": 0.6042, "step": 5512 }, { "epoch": 0.7376237623762376, "grad_norm": 37.96943664550781, "learning_rate": 2.623762376237624e-07, "loss": 0.3761, "step": 5513 }, { "epoch": 0.7377575595397378, "grad_norm": 40.39948272705078, "learning_rate": 2.622424404602622e-07, "loss": 0.5313, "step": 5514 }, { "epoch": 0.7378913567032379, "grad_norm": 30.896686553955078, "learning_rate": 2.621086432967621e-07, "loss": 0.3364, "step": 5515 }, { "epoch": 0.7380251538667381, "grad_norm": 25.038881301879883, "learning_rate": 2.6197484613326196e-07, "loss": 0.2801, "step": 5516 }, { "epoch": 0.7381589510302382, "grad_norm": 44.38338088989258, "learning_rate": 2.6184104896976184e-07, "loss": 0.5249, "step": 5517 }, { "epoch": 0.7382927481937382, "grad_norm": 28.07524871826172, "learning_rate": 2.6170725180626166e-07, "loss": 0.2283, "step": 5518 }, { "epoch": 0.7384265453572384, "grad_norm": 39.57826232910156, "learning_rate": 2.6157345464276154e-07, "loss": 0.5791, "step": 5519 }, { "epoch": 0.7385603425207385, "grad_norm": 50.99422073364258, "learning_rate": 2.614396574792614e-07, "loss": 0.3024, "step": 5520 }, { "epoch": 0.7386941396842387, "grad_norm": 22.40597152709961, "learning_rate": 2.613058603157613e-07, "loss": 0.3352, "step": 5521 }, { "epoch": 0.7388279368477388, "grad_norm": 46.869266510009766, "learning_rate": 2.6117206315226113e-07, "loss": 0.5121, "step": 5522 }, { "epoch": 0.738961734011239, "grad_norm": 29.67239761352539, "learning_rate": 2.6103826598876106e-07, "loss": 0.3397, "step": 5523 }, { "epoch": 0.7390955311747391, "grad_norm": 18.840442657470703, "learning_rate": 2.609044688252609e-07, "loss": 0.2457, "step": 5524 }, { "epoch": 0.7392293283382392, "grad_norm": 34.682098388671875, "learning_rate": 2.6077067166176077e-07, "loss": 0.4077, "step": 5525 }, { "epoch": 0.7393631255017393, "grad_norm": 32.55626678466797, "learning_rate": 2.606368744982606e-07, "loss": 0.369, "step": 5526 }, { "epoch": 0.7394969226652395, "grad_norm": 32.691898345947266, "learning_rate": 2.605030773347605e-07, "loss": 0.4326, "step": 5527 }, { "epoch": 0.7396307198287396, "grad_norm": 29.292709350585938, "learning_rate": 2.6036928017126035e-07, "loss": 0.4591, "step": 5528 }, { "epoch": 0.7397645169922398, "grad_norm": 22.779369354248047, "learning_rate": 2.6023548300776023e-07, "loss": 0.1747, "step": 5529 }, { "epoch": 0.7398983141557399, "grad_norm": 36.1392707824707, "learning_rate": 2.6010168584426006e-07, "loss": 0.4425, "step": 5530 }, { "epoch": 0.7400321113192401, "grad_norm": 27.909175872802734, "learning_rate": 2.5996788868076e-07, "loss": 0.3959, "step": 5531 }, { "epoch": 0.7401659084827402, "grad_norm": 26.224889755249023, "learning_rate": 2.598340915172598e-07, "loss": 0.339, "step": 5532 }, { "epoch": 0.7402997056462403, "grad_norm": 22.48994255065918, "learning_rate": 2.597002943537597e-07, "loss": 0.2675, "step": 5533 }, { "epoch": 0.7404335028097404, "grad_norm": 34.10752868652344, "learning_rate": 2.595664971902596e-07, "loss": 0.4962, "step": 5534 }, { "epoch": 0.7405672999732406, "grad_norm": 26.23805046081543, "learning_rate": 2.5943270002675945e-07, "loss": 0.2913, "step": 5535 }, { "epoch": 0.7407010971367407, "grad_norm": 28.33621597290039, "learning_rate": 2.592989028632593e-07, "loss": 0.2814, "step": 5536 }, { "epoch": 0.7408348943002409, "grad_norm": 35.614295959472656, "learning_rate": 2.5916510569975916e-07, "loss": 0.5024, "step": 5537 }, { "epoch": 0.740968691463741, "grad_norm": 27.152982711791992, "learning_rate": 2.5903130853625904e-07, "loss": 0.4408, "step": 5538 }, { "epoch": 0.7411024886272412, "grad_norm": 26.62236785888672, "learning_rate": 2.588975113727589e-07, "loss": 0.3657, "step": 5539 }, { "epoch": 0.7412362857907412, "grad_norm": 44.01092529296875, "learning_rate": 2.5876371420925874e-07, "loss": 0.3448, "step": 5540 }, { "epoch": 0.7413700829542413, "grad_norm": 49.05390167236328, "learning_rate": 2.5862991704575857e-07, "loss": 0.6039, "step": 5541 }, { "epoch": 0.7415038801177415, "grad_norm": 36.05138397216797, "learning_rate": 2.584961198822585e-07, "loss": 0.2562, "step": 5542 }, { "epoch": 0.7416376772812416, "grad_norm": 56.60533142089844, "learning_rate": 2.5836232271875833e-07, "loss": 0.538, "step": 5543 }, { "epoch": 0.7417714744447418, "grad_norm": 19.52121925354004, "learning_rate": 2.582285255552582e-07, "loss": 0.2932, "step": 5544 }, { "epoch": 0.7419052716082419, "grad_norm": 38.47966384887695, "learning_rate": 2.580947283917581e-07, "loss": 0.504, "step": 5545 }, { "epoch": 0.7420390687717421, "grad_norm": 22.02315330505371, "learning_rate": 2.5796093122825797e-07, "loss": 0.236, "step": 5546 }, { "epoch": 0.7421728659352421, "grad_norm": 35.11128616333008, "learning_rate": 2.578271340647578e-07, "loss": 0.3496, "step": 5547 }, { "epoch": 0.7423066630987423, "grad_norm": 30.633907318115234, "learning_rate": 2.5769333690125767e-07, "loss": 0.5222, "step": 5548 }, { "epoch": 0.7424404602622424, "grad_norm": 31.9858341217041, "learning_rate": 2.5755953973775755e-07, "loss": 0.2898, "step": 5549 }, { "epoch": 0.7425742574257426, "grad_norm": 32.4011344909668, "learning_rate": 2.5742574257425743e-07, "loss": 0.4079, "step": 5550 }, { "epoch": 0.7427080545892427, "grad_norm": 27.398611068725586, "learning_rate": 2.5729194541075726e-07, "loss": 0.3689, "step": 5551 }, { "epoch": 0.7428418517527429, "grad_norm": 22.386798858642578, "learning_rate": 2.5715814824725714e-07, "loss": 0.2581, "step": 5552 }, { "epoch": 0.742975648916243, "grad_norm": 40.39767837524414, "learning_rate": 2.57024351083757e-07, "loss": 0.4604, "step": 5553 }, { "epoch": 0.7431094460797432, "grad_norm": 28.134862899780273, "learning_rate": 2.568905539202569e-07, "loss": 0.4257, "step": 5554 }, { "epoch": 0.7432432432432432, "grad_norm": 45.57358932495117, "learning_rate": 2.567567567567567e-07, "loss": 0.3647, "step": 5555 }, { "epoch": 0.7433770404067434, "grad_norm": 42.89253616333008, "learning_rate": 2.5662295959325665e-07, "loss": 0.4925, "step": 5556 }, { "epoch": 0.7435108375702435, "grad_norm": 21.747516632080078, "learning_rate": 2.564891624297565e-07, "loss": 0.368, "step": 5557 }, { "epoch": 0.7436446347337436, "grad_norm": 39.522491455078125, "learning_rate": 2.5635536526625636e-07, "loss": 0.5665, "step": 5558 }, { "epoch": 0.7437784318972438, "grad_norm": 28.857526779174805, "learning_rate": 2.562215681027562e-07, "loss": 0.296, "step": 5559 }, { "epoch": 0.743912229060744, "grad_norm": 24.282846450805664, "learning_rate": 2.560877709392561e-07, "loss": 0.3112, "step": 5560 }, { "epoch": 0.7440460262242441, "grad_norm": 30.75560760498047, "learning_rate": 2.5595397377575594e-07, "loss": 0.3273, "step": 5561 }, { "epoch": 0.7441798233877441, "grad_norm": 31.333364486694336, "learning_rate": 2.558201766122558e-07, "loss": 0.332, "step": 5562 }, { "epoch": 0.7443136205512443, "grad_norm": 20.407617568969727, "learning_rate": 2.5568637944875565e-07, "loss": 0.3098, "step": 5563 }, { "epoch": 0.7444474177147444, "grad_norm": 33.73951721191406, "learning_rate": 2.555525822852556e-07, "loss": 0.3863, "step": 5564 }, { "epoch": 0.7445812148782446, "grad_norm": 24.900760650634766, "learning_rate": 2.554187851217554e-07, "loss": 0.3562, "step": 5565 }, { "epoch": 0.7447150120417447, "grad_norm": 33.1551513671875, "learning_rate": 2.5528498795825523e-07, "loss": 0.2918, "step": 5566 }, { "epoch": 0.7448488092052449, "grad_norm": 19.47638511657715, "learning_rate": 2.5515119079475517e-07, "loss": 0.3623, "step": 5567 }, { "epoch": 0.744982606368745, "grad_norm": 22.468103408813477, "learning_rate": 2.55017393631255e-07, "loss": 0.3097, "step": 5568 }, { "epoch": 0.7451164035322451, "grad_norm": 34.550106048583984, "learning_rate": 2.5488359646775487e-07, "loss": 0.4813, "step": 5569 }, { "epoch": 0.7452502006957452, "grad_norm": 31.703462600708008, "learning_rate": 2.547497993042547e-07, "loss": 0.3589, "step": 5570 }, { "epoch": 0.7453839978592454, "grad_norm": 40.72039031982422, "learning_rate": 2.5461600214075463e-07, "loss": 0.2921, "step": 5571 }, { "epoch": 0.7455177950227455, "grad_norm": 24.484403610229492, "learning_rate": 2.5448220497725446e-07, "loss": 0.3118, "step": 5572 }, { "epoch": 0.7456515921862457, "grad_norm": 30.53421401977539, "learning_rate": 2.5434840781375434e-07, "loss": 0.3884, "step": 5573 }, { "epoch": 0.7457853893497458, "grad_norm": 31.907089233398438, "learning_rate": 2.5421461065025416e-07, "loss": 0.295, "step": 5574 }, { "epoch": 0.745919186513246, "grad_norm": 32.571434020996094, "learning_rate": 2.540808134867541e-07, "loss": 0.422, "step": 5575 }, { "epoch": 0.7460529836767461, "grad_norm": 28.9526309967041, "learning_rate": 2.539470163232539e-07, "loss": 0.3536, "step": 5576 }, { "epoch": 0.7461867808402461, "grad_norm": 34.77292251586914, "learning_rate": 2.538132191597538e-07, "loss": 0.5636, "step": 5577 }, { "epoch": 0.7463205780037463, "grad_norm": 21.953229904174805, "learning_rate": 2.536794219962537e-07, "loss": 0.3356, "step": 5578 }, { "epoch": 0.7464543751672464, "grad_norm": 21.873414993286133, "learning_rate": 2.5354562483275356e-07, "loss": 0.3307, "step": 5579 }, { "epoch": 0.7465881723307466, "grad_norm": 45.07708740234375, "learning_rate": 2.534118276692534e-07, "loss": 0.4884, "step": 5580 }, { "epoch": 0.7467219694942467, "grad_norm": 15.216187477111816, "learning_rate": 2.5327803050575326e-07, "loss": 0.2313, "step": 5581 }, { "epoch": 0.7468557666577469, "grad_norm": 37.10202407836914, "learning_rate": 2.5314423334225314e-07, "loss": 0.293, "step": 5582 }, { "epoch": 0.746989563821247, "grad_norm": 18.281551361083984, "learning_rate": 2.53010436178753e-07, "loss": 0.2381, "step": 5583 }, { "epoch": 0.7471233609847471, "grad_norm": 37.3985481262207, "learning_rate": 2.5287663901525285e-07, "loss": 0.4067, "step": 5584 }, { "epoch": 0.7472571581482472, "grad_norm": 51.15666198730469, "learning_rate": 2.5274284185175273e-07, "loss": 0.2748, "step": 5585 }, { "epoch": 0.7473909553117474, "grad_norm": 39.45109939575195, "learning_rate": 2.526090446882526e-07, "loss": 0.6493, "step": 5586 }, { "epoch": 0.7475247524752475, "grad_norm": 23.74864387512207, "learning_rate": 2.524752475247525e-07, "loss": 0.3364, "step": 5587 }, { "epoch": 0.7476585496387477, "grad_norm": 41.30481719970703, "learning_rate": 2.523414503612523e-07, "loss": 0.2099, "step": 5588 }, { "epoch": 0.7477923468022478, "grad_norm": 38.622779846191406, "learning_rate": 2.5220765319775225e-07, "loss": 0.6284, "step": 5589 }, { "epoch": 0.747926143965748, "grad_norm": 30.873130798339844, "learning_rate": 2.5207385603425207e-07, "loss": 0.4655, "step": 5590 }, { "epoch": 0.748059941129248, "grad_norm": 32.235530853271484, "learning_rate": 2.5194005887075195e-07, "loss": 0.3985, "step": 5591 }, { "epoch": 0.7481937382927482, "grad_norm": 26.55544662475586, "learning_rate": 2.518062617072518e-07, "loss": 0.461, "step": 5592 }, { "epoch": 0.7483275354562483, "grad_norm": 51.22807312011719, "learning_rate": 2.516724645437517e-07, "loss": 0.5994, "step": 5593 }, { "epoch": 0.7484613326197485, "grad_norm": 25.50409507751465, "learning_rate": 2.5153866738025154e-07, "loss": 0.2492, "step": 5594 }, { "epoch": 0.7485951297832486, "grad_norm": 41.19837188720703, "learning_rate": 2.5140487021675136e-07, "loss": 0.5257, "step": 5595 }, { "epoch": 0.7487289269467488, "grad_norm": 27.220964431762695, "learning_rate": 2.5127107305325124e-07, "loss": 0.21, "step": 5596 }, { "epoch": 0.7488627241102489, "grad_norm": 24.75298309326172, "learning_rate": 2.511372758897511e-07, "loss": 0.2287, "step": 5597 }, { "epoch": 0.748996521273749, "grad_norm": 46.91367721557617, "learning_rate": 2.51003478726251e-07, "loss": 0.5193, "step": 5598 }, { "epoch": 0.7491303184372491, "grad_norm": 21.50592613220215, "learning_rate": 2.508696815627508e-07, "loss": 0.2293, "step": 5599 }, { "epoch": 0.7492641156007492, "grad_norm": 28.343793869018555, "learning_rate": 2.5073588439925076e-07, "loss": 0.3277, "step": 5600 }, { "epoch": 0.7493979127642494, "grad_norm": 51.23619842529297, "learning_rate": 2.506020872357506e-07, "loss": 0.511, "step": 5601 }, { "epoch": 0.7495317099277495, "grad_norm": 48.988563537597656, "learning_rate": 2.5046829007225046e-07, "loss": 0.5896, "step": 5602 }, { "epoch": 0.7496655070912497, "grad_norm": 28.17780876159668, "learning_rate": 2.503344929087503e-07, "loss": 0.4352, "step": 5603 }, { "epoch": 0.7497993042547498, "grad_norm": 67.99382781982422, "learning_rate": 2.502006957452502e-07, "loss": 0.7686, "step": 5604 }, { "epoch": 0.74993310141825, "grad_norm": 36.34639358520508, "learning_rate": 2.5006689858175005e-07, "loss": 0.3644, "step": 5605 }, { "epoch": 0.75006689858175, "grad_norm": 27.06070899963379, "learning_rate": 2.4993310141824993e-07, "loss": 0.2439, "step": 5606 }, { "epoch": 0.7502006957452502, "grad_norm": 30.166078567504883, "learning_rate": 2.497993042547498e-07, "loss": 0.3819, "step": 5607 }, { "epoch": 0.7503344929087503, "grad_norm": 49.69041442871094, "learning_rate": 2.4966550709124963e-07, "loss": 0.5134, "step": 5608 }, { "epoch": 0.7504682900722505, "grad_norm": 28.341154098510742, "learning_rate": 2.495317099277495e-07, "loss": 0.3357, "step": 5609 }, { "epoch": 0.7506020872357506, "grad_norm": 23.568178176879883, "learning_rate": 2.493979127642494e-07, "loss": 0.2708, "step": 5610 }, { "epoch": 0.7507358843992508, "grad_norm": 38.13457489013672, "learning_rate": 2.4926411560074927e-07, "loss": 0.3029, "step": 5611 }, { "epoch": 0.7508696815627509, "grad_norm": 14.845688819885254, "learning_rate": 2.4913031843724915e-07, "loss": 0.1404, "step": 5612 }, { "epoch": 0.751003478726251, "grad_norm": 55.19417190551758, "learning_rate": 2.48996521273749e-07, "loss": 0.6934, "step": 5613 }, { "epoch": 0.7511372758897511, "grad_norm": 22.67266845703125, "learning_rate": 2.4886272411024886e-07, "loss": 0.2389, "step": 5614 }, { "epoch": 0.7512710730532512, "grad_norm": 32.924598693847656, "learning_rate": 2.4872892694674874e-07, "loss": 0.4203, "step": 5615 }, { "epoch": 0.7514048702167514, "grad_norm": 20.26316261291504, "learning_rate": 2.485951297832486e-07, "loss": 0.2328, "step": 5616 }, { "epoch": 0.7515386673802515, "grad_norm": 30.659191131591797, "learning_rate": 2.4846133261974844e-07, "loss": 0.4847, "step": 5617 }, { "epoch": 0.7516724645437517, "grad_norm": 32.58921432495117, "learning_rate": 2.483275354562483e-07, "loss": 0.3923, "step": 5618 }, { "epoch": 0.7518062617072518, "grad_norm": 19.9078369140625, "learning_rate": 2.4819373829274815e-07, "loss": 0.2626, "step": 5619 }, { "epoch": 0.751940058870752, "grad_norm": 41.776371002197266, "learning_rate": 2.48059941129248e-07, "loss": 0.4015, "step": 5620 }, { "epoch": 0.752073856034252, "grad_norm": 29.262813568115234, "learning_rate": 2.479261439657479e-07, "loss": 0.3414, "step": 5621 }, { "epoch": 0.7522076531977522, "grad_norm": 32.17095947265625, "learning_rate": 2.477923468022478e-07, "loss": 0.2593, "step": 5622 }, { "epoch": 0.7523414503612523, "grad_norm": 29.493297576904297, "learning_rate": 2.4765854963874766e-07, "loss": 0.2544, "step": 5623 }, { "epoch": 0.7524752475247525, "grad_norm": 27.80255126953125, "learning_rate": 2.475247524752475e-07, "loss": 0.2181, "step": 5624 }, { "epoch": 0.7526090446882526, "grad_norm": 22.344844818115234, "learning_rate": 2.4739095531174737e-07, "loss": 0.1633, "step": 5625 }, { "epoch": 0.7527428418517528, "grad_norm": 17.596540451049805, "learning_rate": 2.4725715814824725e-07, "loss": 0.1687, "step": 5626 }, { "epoch": 0.7528766390152529, "grad_norm": 22.88265609741211, "learning_rate": 2.4712336098474713e-07, "loss": 0.2359, "step": 5627 }, { "epoch": 0.753010436178753, "grad_norm": 35.68690872192383, "learning_rate": 2.4698956382124695e-07, "loss": 0.3391, "step": 5628 }, { "epoch": 0.7531442333422531, "grad_norm": 31.059537887573242, "learning_rate": 2.4685576665774683e-07, "loss": 0.5478, "step": 5629 }, { "epoch": 0.7532780305057533, "grad_norm": 22.587121963500977, "learning_rate": 2.467219694942467e-07, "loss": 0.261, "step": 5630 }, { "epoch": 0.7534118276692534, "grad_norm": 16.450271606445312, "learning_rate": 2.465881723307466e-07, "loss": 0.2048, "step": 5631 }, { "epoch": 0.7535456248327536, "grad_norm": 39.2324333190918, "learning_rate": 2.464543751672464e-07, "loss": 0.364, "step": 5632 }, { "epoch": 0.7536794219962537, "grad_norm": 63.19227981567383, "learning_rate": 2.463205780037463e-07, "loss": 0.7478, "step": 5633 }, { "epoch": 0.7538132191597539, "grad_norm": 27.45780372619629, "learning_rate": 2.461867808402462e-07, "loss": 0.3763, "step": 5634 }, { "epoch": 0.7539470163232539, "grad_norm": 43.72163391113281, "learning_rate": 2.4605298367674606e-07, "loss": 0.4607, "step": 5635 }, { "epoch": 0.754080813486754, "grad_norm": 39.38627243041992, "learning_rate": 2.4591918651324594e-07, "loss": 0.1765, "step": 5636 }, { "epoch": 0.7542146106502542, "grad_norm": 33.26155090332031, "learning_rate": 2.4578538934974576e-07, "loss": 0.3006, "step": 5637 }, { "epoch": 0.7543484078137543, "grad_norm": 42.622596740722656, "learning_rate": 2.4565159218624564e-07, "loss": 0.4811, "step": 5638 }, { "epoch": 0.7544822049772545, "grad_norm": 35.37136459350586, "learning_rate": 2.455177950227455e-07, "loss": 0.4254, "step": 5639 }, { "epoch": 0.7546160021407546, "grad_norm": 44.39422607421875, "learning_rate": 2.453839978592454e-07, "loss": 0.4489, "step": 5640 }, { "epoch": 0.7547497993042548, "grad_norm": 29.618457794189453, "learning_rate": 2.452502006957452e-07, "loss": 0.3529, "step": 5641 }, { "epoch": 0.7548835964677549, "grad_norm": 21.677705764770508, "learning_rate": 2.451164035322451e-07, "loss": 0.3285, "step": 5642 }, { "epoch": 0.755017393631255, "grad_norm": 39.57292938232422, "learning_rate": 2.44982606368745e-07, "loss": 0.3198, "step": 5643 }, { "epoch": 0.7551511907947551, "grad_norm": 32.705684661865234, "learning_rate": 2.4484880920524486e-07, "loss": 0.3482, "step": 5644 }, { "epoch": 0.7552849879582553, "grad_norm": 37.01752853393555, "learning_rate": 2.4471501204174474e-07, "loss": 0.3502, "step": 5645 }, { "epoch": 0.7554187851217554, "grad_norm": 36.926979064941406, "learning_rate": 2.4458121487824457e-07, "loss": 0.5562, "step": 5646 }, { "epoch": 0.7555525822852556, "grad_norm": 52.24563217163086, "learning_rate": 2.4444741771474445e-07, "loss": 0.2443, "step": 5647 }, { "epoch": 0.7556863794487557, "grad_norm": 54.63786697387695, "learning_rate": 2.443136205512443e-07, "loss": 0.4324, "step": 5648 }, { "epoch": 0.7558201766122559, "grad_norm": 51.58677291870117, "learning_rate": 2.4417982338774415e-07, "loss": 0.6034, "step": 5649 }, { "epoch": 0.7559539737757559, "grad_norm": 35.50999069213867, "learning_rate": 2.4404602622424403e-07, "loss": 0.5143, "step": 5650 }, { "epoch": 0.756087770939256, "grad_norm": 40.498741149902344, "learning_rate": 2.439122290607439e-07, "loss": 0.3912, "step": 5651 }, { "epoch": 0.7562215681027562, "grad_norm": 34.75751876831055, "learning_rate": 2.4377843189724374e-07, "loss": 0.4243, "step": 5652 }, { "epoch": 0.7563553652662564, "grad_norm": 27.698015213012695, "learning_rate": 2.436446347337436e-07, "loss": 0.3405, "step": 5653 }, { "epoch": 0.7564891624297565, "grad_norm": 33.41348648071289, "learning_rate": 2.435108375702435e-07, "loss": 0.3248, "step": 5654 }, { "epoch": 0.7566229595932566, "grad_norm": 42.09619903564453, "learning_rate": 2.433770404067434e-07, "loss": 0.611, "step": 5655 }, { "epoch": 0.7567567567567568, "grad_norm": 32.3985481262207, "learning_rate": 2.4324324324324326e-07, "loss": 0.4362, "step": 5656 }, { "epoch": 0.7568905539202568, "grad_norm": 42.474021911621094, "learning_rate": 2.431094460797431e-07, "loss": 0.3624, "step": 5657 }, { "epoch": 0.757024351083757, "grad_norm": 44.85150909423828, "learning_rate": 2.4297564891624296e-07, "loss": 0.5948, "step": 5658 }, { "epoch": 0.7571581482472571, "grad_norm": 50.98122787475586, "learning_rate": 2.4284185175274284e-07, "loss": 0.3673, "step": 5659 }, { "epoch": 0.7572919454107573, "grad_norm": 45.44392013549805, "learning_rate": 2.427080545892427e-07, "loss": 0.3508, "step": 5660 }, { "epoch": 0.7574257425742574, "grad_norm": 31.487396240234375, "learning_rate": 2.4257425742574255e-07, "loss": 0.4218, "step": 5661 }, { "epoch": 0.7575595397377576, "grad_norm": 38.81792068481445, "learning_rate": 2.424404602622424e-07, "loss": 0.4611, "step": 5662 }, { "epoch": 0.7576933369012577, "grad_norm": 44.482078552246094, "learning_rate": 2.423066630987423e-07, "loss": 0.5725, "step": 5663 }, { "epoch": 0.7578271340647579, "grad_norm": 45.369346618652344, "learning_rate": 2.421728659352422e-07, "loss": 0.2303, "step": 5664 }, { "epoch": 0.7579609312282579, "grad_norm": 44.02102279663086, "learning_rate": 2.42039068771742e-07, "loss": 0.5601, "step": 5665 }, { "epoch": 0.7580947283917581, "grad_norm": 36.79574966430664, "learning_rate": 2.419052716082419e-07, "loss": 0.2275, "step": 5666 }, { "epoch": 0.7582285255552582, "grad_norm": 31.724428176879883, "learning_rate": 2.4177147444474177e-07, "loss": 0.587, "step": 5667 }, { "epoch": 0.7583623227187584, "grad_norm": 79.05868530273438, "learning_rate": 2.4163767728124165e-07, "loss": 0.603, "step": 5668 }, { "epoch": 0.7584961198822585, "grad_norm": 35.73981475830078, "learning_rate": 2.4150388011774153e-07, "loss": 0.4108, "step": 5669 }, { "epoch": 0.7586299170457587, "grad_norm": 37.15681457519531, "learning_rate": 2.4137008295424135e-07, "loss": 0.5013, "step": 5670 }, { "epoch": 0.7587637142092588, "grad_norm": 30.185272216796875, "learning_rate": 2.4123628579074123e-07, "loss": 0.3122, "step": 5671 }, { "epoch": 0.7588975113727588, "grad_norm": 51.12543869018555, "learning_rate": 2.4110248862724106e-07, "loss": 0.5794, "step": 5672 }, { "epoch": 0.759031308536259, "grad_norm": 25.292394638061523, "learning_rate": 2.4096869146374094e-07, "loss": 0.3667, "step": 5673 }, { "epoch": 0.7591651056997591, "grad_norm": 42.51786422729492, "learning_rate": 2.408348943002408e-07, "loss": 0.496, "step": 5674 }, { "epoch": 0.7592989028632593, "grad_norm": 31.71390724182129, "learning_rate": 2.407010971367407e-07, "loss": 0.3975, "step": 5675 }, { "epoch": 0.7594327000267594, "grad_norm": 21.42453956604004, "learning_rate": 2.405672999732405e-07, "loss": 0.3831, "step": 5676 }, { "epoch": 0.7595664971902596, "grad_norm": 58.70012283325195, "learning_rate": 2.404335028097404e-07, "loss": 0.3426, "step": 5677 }, { "epoch": 0.7597002943537597, "grad_norm": 26.880096435546875, "learning_rate": 2.402997056462403e-07, "loss": 0.2232, "step": 5678 }, { "epoch": 0.7598340915172598, "grad_norm": 30.54808807373047, "learning_rate": 2.4016590848274016e-07, "loss": 0.3228, "step": 5679 }, { "epoch": 0.7599678886807599, "grad_norm": 30.452302932739258, "learning_rate": 2.4003211131924004e-07, "loss": 0.2823, "step": 5680 }, { "epoch": 0.7601016858442601, "grad_norm": 28.76277732849121, "learning_rate": 2.3989831415573987e-07, "loss": 0.3354, "step": 5681 }, { "epoch": 0.7602354830077602, "grad_norm": 36.312416076660156, "learning_rate": 2.3976451699223975e-07, "loss": 0.5083, "step": 5682 }, { "epoch": 0.7603692801712604, "grad_norm": 34.588294982910156, "learning_rate": 2.396307198287396e-07, "loss": 0.4138, "step": 5683 }, { "epoch": 0.7605030773347605, "grad_norm": 26.950645446777344, "learning_rate": 2.394969226652395e-07, "loss": 0.4451, "step": 5684 }, { "epoch": 0.7606368744982607, "grad_norm": 26.986494064331055, "learning_rate": 2.3936312550173933e-07, "loss": 0.3615, "step": 5685 }, { "epoch": 0.7607706716617608, "grad_norm": 21.488479614257812, "learning_rate": 2.392293283382392e-07, "loss": 0.2289, "step": 5686 }, { "epoch": 0.7609044688252609, "grad_norm": 26.273847579956055, "learning_rate": 2.390955311747391e-07, "loss": 0.202, "step": 5687 }, { "epoch": 0.761038265988761, "grad_norm": 45.679019927978516, "learning_rate": 2.3896173401123897e-07, "loss": 0.4185, "step": 5688 }, { "epoch": 0.7611720631522612, "grad_norm": 44.46140670776367, "learning_rate": 2.388279368477388e-07, "loss": 0.4246, "step": 5689 }, { "epoch": 0.7613058603157613, "grad_norm": 16.22149085998535, "learning_rate": 2.386941396842387e-07, "loss": 0.1349, "step": 5690 }, { "epoch": 0.7614396574792615, "grad_norm": 24.689655303955078, "learning_rate": 2.3856034252073855e-07, "loss": 0.3693, "step": 5691 }, { "epoch": 0.7615734546427616, "grad_norm": 39.979557037353516, "learning_rate": 2.3842654535723843e-07, "loss": 0.2919, "step": 5692 }, { "epoch": 0.7617072518062618, "grad_norm": 24.71870231628418, "learning_rate": 2.3829274819373829e-07, "loss": 0.2864, "step": 5693 }, { "epoch": 0.7618410489697618, "grad_norm": 37.59667205810547, "learning_rate": 2.3815895103023814e-07, "loss": 0.3732, "step": 5694 }, { "epoch": 0.7619748461332619, "grad_norm": 45.23374557495117, "learning_rate": 2.3802515386673802e-07, "loss": 0.547, "step": 5695 }, { "epoch": 0.7621086432967621, "grad_norm": 33.60671615600586, "learning_rate": 2.3789135670323787e-07, "loss": 0.4229, "step": 5696 }, { "epoch": 0.7622424404602622, "grad_norm": 43.37024688720703, "learning_rate": 2.3775755953973775e-07, "loss": 0.4692, "step": 5697 }, { "epoch": 0.7623762376237624, "grad_norm": 58.89657974243164, "learning_rate": 2.376237623762376e-07, "loss": 0.7037, "step": 5698 }, { "epoch": 0.7625100347872625, "grad_norm": 19.302175521850586, "learning_rate": 2.3748996521273748e-07, "loss": 0.2255, "step": 5699 }, { "epoch": 0.7626438319507627, "grad_norm": 50.5054817199707, "learning_rate": 2.3735616804923734e-07, "loss": 0.5129, "step": 5700 }, { "epoch": 0.7627776291142627, "grad_norm": 47.032405853271484, "learning_rate": 2.3722237088573721e-07, "loss": 0.3271, "step": 5701 }, { "epoch": 0.7629114262777629, "grad_norm": 38.571556091308594, "learning_rate": 2.370885737222371e-07, "loss": 0.3515, "step": 5702 }, { "epoch": 0.763045223441263, "grad_norm": 52.751338958740234, "learning_rate": 2.3695477655873695e-07, "loss": 0.463, "step": 5703 }, { "epoch": 0.7631790206047632, "grad_norm": 32.71303939819336, "learning_rate": 2.3682097939523683e-07, "loss": 0.344, "step": 5704 }, { "epoch": 0.7633128177682633, "grad_norm": 51.325103759765625, "learning_rate": 2.3668718223173668e-07, "loss": 0.5128, "step": 5705 }, { "epoch": 0.7634466149317635, "grad_norm": 38.08867645263672, "learning_rate": 2.3655338506823656e-07, "loss": 0.4539, "step": 5706 }, { "epoch": 0.7635804120952636, "grad_norm": 21.905746459960938, "learning_rate": 2.364195879047364e-07, "loss": 0.2796, "step": 5707 }, { "epoch": 0.7637142092587638, "grad_norm": 25.540504455566406, "learning_rate": 2.362857907412363e-07, "loss": 0.3206, "step": 5708 }, { "epoch": 0.7638480064222638, "grad_norm": 69.96965026855469, "learning_rate": 2.3615199357773614e-07, "loss": 0.4867, "step": 5709 }, { "epoch": 0.763981803585764, "grad_norm": 35.796653747558594, "learning_rate": 2.3601819641423602e-07, "loss": 0.3984, "step": 5710 }, { "epoch": 0.7641156007492641, "grad_norm": 44.26872253417969, "learning_rate": 2.3588439925073585e-07, "loss": 0.4243, "step": 5711 }, { "epoch": 0.7642493979127643, "grad_norm": 26.67279815673828, "learning_rate": 2.3575060208723573e-07, "loss": 0.3709, "step": 5712 }, { "epoch": 0.7643831950762644, "grad_norm": 43.913631439208984, "learning_rate": 2.356168049237356e-07, "loss": 0.313, "step": 5713 }, { "epoch": 0.7645169922397645, "grad_norm": 41.69340896606445, "learning_rate": 2.3548300776023546e-07, "loss": 0.5141, "step": 5714 }, { "epoch": 0.7646507894032647, "grad_norm": 29.51202392578125, "learning_rate": 2.3534921059673534e-07, "loss": 0.2394, "step": 5715 }, { "epoch": 0.7647845865667647, "grad_norm": 42.03524398803711, "learning_rate": 2.352154134332352e-07, "loss": 0.4283, "step": 5716 }, { "epoch": 0.7649183837302649, "grad_norm": 26.511268615722656, "learning_rate": 2.3508161626973507e-07, "loss": 0.235, "step": 5717 }, { "epoch": 0.765052180893765, "grad_norm": 37.43805694580078, "learning_rate": 2.3494781910623492e-07, "loss": 0.5957, "step": 5718 }, { "epoch": 0.7651859780572652, "grad_norm": 36.957794189453125, "learning_rate": 2.348140219427348e-07, "loss": 0.4604, "step": 5719 }, { "epoch": 0.7653197752207653, "grad_norm": 28.531036376953125, "learning_rate": 2.3468022477923466e-07, "loss": 0.3125, "step": 5720 }, { "epoch": 0.7654535723842655, "grad_norm": 23.413772583007812, "learning_rate": 2.3454642761573454e-07, "loss": 0.2434, "step": 5721 }, { "epoch": 0.7655873695477656, "grad_norm": 39.95848846435547, "learning_rate": 2.344126304522344e-07, "loss": 0.3822, "step": 5722 }, { "epoch": 0.7657211667112657, "grad_norm": 36.511600494384766, "learning_rate": 2.3427883328873427e-07, "loss": 0.3577, "step": 5723 }, { "epoch": 0.7658549638747658, "grad_norm": 22.311843872070312, "learning_rate": 2.3414503612523415e-07, "loss": 0.3258, "step": 5724 }, { "epoch": 0.765988761038266, "grad_norm": 21.445682525634766, "learning_rate": 2.34011238961734e-07, "loss": 0.2644, "step": 5725 }, { "epoch": 0.7661225582017661, "grad_norm": 30.009796142578125, "learning_rate": 2.3387744179823388e-07, "loss": 0.3083, "step": 5726 }, { "epoch": 0.7662563553652663, "grad_norm": 54.349525451660156, "learning_rate": 2.3374364463473373e-07, "loss": 0.6663, "step": 5727 }, { "epoch": 0.7663901525287664, "grad_norm": 38.182167053222656, "learning_rate": 2.336098474712336e-07, "loss": 0.3531, "step": 5728 }, { "epoch": 0.7665239496922666, "grad_norm": 30.711307525634766, "learning_rate": 2.3347605030773346e-07, "loss": 0.4036, "step": 5729 }, { "epoch": 0.7666577468557667, "grad_norm": 35.98990249633789, "learning_rate": 2.3334225314423334e-07, "loss": 0.5396, "step": 5730 }, { "epoch": 0.7667915440192667, "grad_norm": 31.510547637939453, "learning_rate": 2.332084559807332e-07, "loss": 0.2338, "step": 5731 }, { "epoch": 0.7669253411827669, "grad_norm": 36.64936828613281, "learning_rate": 2.3307465881723308e-07, "loss": 0.279, "step": 5732 }, { "epoch": 0.767059138346267, "grad_norm": 29.393741607666016, "learning_rate": 2.3294086165373293e-07, "loss": 0.1902, "step": 5733 }, { "epoch": 0.7671929355097672, "grad_norm": 33.56319046020508, "learning_rate": 2.328070644902328e-07, "loss": 0.4722, "step": 5734 }, { "epoch": 0.7673267326732673, "grad_norm": 47.38768768310547, "learning_rate": 2.3267326732673269e-07, "loss": 0.5479, "step": 5735 }, { "epoch": 0.7674605298367675, "grad_norm": 31.48980140686035, "learning_rate": 2.3253947016323254e-07, "loss": 0.3702, "step": 5736 }, { "epoch": 0.7675943270002676, "grad_norm": 43.10712432861328, "learning_rate": 2.324056729997324e-07, "loss": 0.3825, "step": 5737 }, { "epoch": 0.7677281241637677, "grad_norm": 36.63046646118164, "learning_rate": 2.3227187583623224e-07, "loss": 0.3921, "step": 5738 }, { "epoch": 0.7678619213272678, "grad_norm": 23.945098876953125, "learning_rate": 2.3213807867273212e-07, "loss": 0.2819, "step": 5739 }, { "epoch": 0.767995718490768, "grad_norm": 42.28912353515625, "learning_rate": 2.3200428150923198e-07, "loss": 0.4329, "step": 5740 }, { "epoch": 0.7681295156542681, "grad_norm": 38.282833099365234, "learning_rate": 2.3187048434573186e-07, "loss": 0.3203, "step": 5741 }, { "epoch": 0.7682633128177683, "grad_norm": 49.49285888671875, "learning_rate": 2.317366871822317e-07, "loss": 0.6768, "step": 5742 }, { "epoch": 0.7683971099812684, "grad_norm": 29.53409767150879, "learning_rate": 2.316028900187316e-07, "loss": 0.2973, "step": 5743 }, { "epoch": 0.7685309071447686, "grad_norm": 46.659610748291016, "learning_rate": 2.3146909285523144e-07, "loss": 0.5084, "step": 5744 }, { "epoch": 0.7686647043082686, "grad_norm": 23.27121353149414, "learning_rate": 2.3133529569173132e-07, "loss": 0.3435, "step": 5745 }, { "epoch": 0.7687985014717688, "grad_norm": 36.13096237182617, "learning_rate": 2.312014985282312e-07, "loss": 0.2821, "step": 5746 }, { "epoch": 0.7689322986352689, "grad_norm": 36.02667999267578, "learning_rate": 2.3106770136473105e-07, "loss": 0.1971, "step": 5747 }, { "epoch": 0.7690660957987691, "grad_norm": 31.79111671447754, "learning_rate": 2.3093390420123093e-07, "loss": 0.5116, "step": 5748 }, { "epoch": 0.7691998929622692, "grad_norm": 23.867881774902344, "learning_rate": 2.3080010703773078e-07, "loss": 0.2981, "step": 5749 }, { "epoch": 0.7693336901257694, "grad_norm": 34.02930450439453, "learning_rate": 2.3066630987423066e-07, "loss": 0.4373, "step": 5750 }, { "epoch": 0.7694674872892695, "grad_norm": 29.312440872192383, "learning_rate": 2.3053251271073052e-07, "loss": 0.5228, "step": 5751 }, { "epoch": 0.7696012844527697, "grad_norm": 21.969486236572266, "learning_rate": 2.303987155472304e-07, "loss": 0.2727, "step": 5752 }, { "epoch": 0.7697350816162697, "grad_norm": 36.751468658447266, "learning_rate": 2.3026491838373025e-07, "loss": 0.5457, "step": 5753 }, { "epoch": 0.7698688787797698, "grad_norm": 43.1298828125, "learning_rate": 2.3013112122023013e-07, "loss": 0.4428, "step": 5754 }, { "epoch": 0.77000267594327, "grad_norm": 47.70954895019531, "learning_rate": 2.2999732405672998e-07, "loss": 0.3401, "step": 5755 }, { "epoch": 0.7701364731067701, "grad_norm": 47.94169235229492, "learning_rate": 2.2986352689322986e-07, "loss": 0.6212, "step": 5756 }, { "epoch": 0.7702702702702703, "grad_norm": 32.401283264160156, "learning_rate": 2.2972972972972974e-07, "loss": 0.4132, "step": 5757 }, { "epoch": 0.7704040674337704, "grad_norm": 33.90913391113281, "learning_rate": 2.295959325662296e-07, "loss": 0.3933, "step": 5758 }, { "epoch": 0.7705378645972706, "grad_norm": 37.72578048706055, "learning_rate": 2.2946213540272947e-07, "loss": 0.5003, "step": 5759 }, { "epoch": 0.7706716617607706, "grad_norm": 33.09278869628906, "learning_rate": 2.2932833823922932e-07, "loss": 0.4496, "step": 5760 }, { "epoch": 0.7708054589242708, "grad_norm": 45.57575607299805, "learning_rate": 2.291945410757292e-07, "loss": 0.4745, "step": 5761 }, { "epoch": 0.7709392560877709, "grad_norm": 30.40062713623047, "learning_rate": 2.2906074391222903e-07, "loss": 0.4391, "step": 5762 }, { "epoch": 0.7710730532512711, "grad_norm": 32.089881896972656, "learning_rate": 2.289269467487289e-07, "loss": 0.3636, "step": 5763 }, { "epoch": 0.7712068504147712, "grad_norm": 49.2653923034668, "learning_rate": 2.2879314958522876e-07, "loss": 0.3301, "step": 5764 }, { "epoch": 0.7713406475782714, "grad_norm": 31.327831268310547, "learning_rate": 2.2865935242172864e-07, "loss": 0.44, "step": 5765 }, { "epoch": 0.7714744447417715, "grad_norm": 39.338623046875, "learning_rate": 2.285255552582285e-07, "loss": 0.4727, "step": 5766 }, { "epoch": 0.7716082419052717, "grad_norm": 37.08074188232422, "learning_rate": 2.2839175809472837e-07, "loss": 0.4319, "step": 5767 }, { "epoch": 0.7717420390687717, "grad_norm": 46.88719177246094, "learning_rate": 2.2825796093122825e-07, "loss": 0.704, "step": 5768 }, { "epoch": 0.7718758362322719, "grad_norm": 25.890003204345703, "learning_rate": 2.281241637677281e-07, "loss": 0.2877, "step": 5769 }, { "epoch": 0.772009633395772, "grad_norm": 58.910640716552734, "learning_rate": 2.2799036660422798e-07, "loss": 0.6596, "step": 5770 }, { "epoch": 0.7721434305592721, "grad_norm": 34.88795471191406, "learning_rate": 2.2785656944072784e-07, "loss": 0.2366, "step": 5771 }, { "epoch": 0.7722772277227723, "grad_norm": 29.348587036132812, "learning_rate": 2.2772277227722772e-07, "loss": 0.2775, "step": 5772 }, { "epoch": 0.7724110248862724, "grad_norm": 27.037845611572266, "learning_rate": 2.2758897511372757e-07, "loss": 0.2709, "step": 5773 }, { "epoch": 0.7725448220497726, "grad_norm": 23.50355339050293, "learning_rate": 2.2745517795022745e-07, "loss": 0.3074, "step": 5774 }, { "epoch": 0.7726786192132726, "grad_norm": 48.20639419555664, "learning_rate": 2.273213807867273e-07, "loss": 0.4569, "step": 5775 }, { "epoch": 0.7728124163767728, "grad_norm": 23.710224151611328, "learning_rate": 2.2718758362322718e-07, "loss": 0.3884, "step": 5776 }, { "epoch": 0.7729462135402729, "grad_norm": 28.857797622680664, "learning_rate": 2.2705378645972703e-07, "loss": 0.3778, "step": 5777 }, { "epoch": 0.7730800107037731, "grad_norm": 27.103574752807617, "learning_rate": 2.269199892962269e-07, "loss": 0.2847, "step": 5778 }, { "epoch": 0.7732138078672732, "grad_norm": 23.48691177368164, "learning_rate": 2.267861921327268e-07, "loss": 0.2575, "step": 5779 }, { "epoch": 0.7733476050307734, "grad_norm": 59.24025344848633, "learning_rate": 2.2665239496922664e-07, "loss": 0.4957, "step": 5780 }, { "epoch": 0.7734814021942735, "grad_norm": 38.48776626586914, "learning_rate": 2.2651859780572652e-07, "loss": 0.3386, "step": 5781 }, { "epoch": 0.7736151993577736, "grad_norm": 31.320493698120117, "learning_rate": 2.2638480064222638e-07, "loss": 0.3887, "step": 5782 }, { "epoch": 0.7737489965212737, "grad_norm": 31.463539123535156, "learning_rate": 2.2625100347872626e-07, "loss": 0.2267, "step": 5783 }, { "epoch": 0.7738827936847739, "grad_norm": 39.77897262573242, "learning_rate": 2.261172063152261e-07, "loss": 0.2565, "step": 5784 }, { "epoch": 0.774016590848274, "grad_norm": 42.52228546142578, "learning_rate": 2.25983409151726e-07, "loss": 0.3927, "step": 5785 }, { "epoch": 0.7741503880117742, "grad_norm": 40.00215148925781, "learning_rate": 2.2584961198822584e-07, "loss": 0.5307, "step": 5786 }, { "epoch": 0.7742841851752743, "grad_norm": 38.2355842590332, "learning_rate": 2.2571581482472572e-07, "loss": 0.4488, "step": 5787 }, { "epoch": 0.7744179823387745, "grad_norm": 26.71662139892578, "learning_rate": 2.2558201766122555e-07, "loss": 0.3491, "step": 5788 }, { "epoch": 0.7745517795022746, "grad_norm": 24.608610153198242, "learning_rate": 2.2544822049772543e-07, "loss": 0.291, "step": 5789 }, { "epoch": 0.7746855766657746, "grad_norm": 43.77129364013672, "learning_rate": 2.253144233342253e-07, "loss": 0.2245, "step": 5790 }, { "epoch": 0.7748193738292748, "grad_norm": 22.952037811279297, "learning_rate": 2.2518062617072516e-07, "loss": 0.2024, "step": 5791 }, { "epoch": 0.7749531709927749, "grad_norm": 36.50046920776367, "learning_rate": 2.2504682900722504e-07, "loss": 0.5577, "step": 5792 }, { "epoch": 0.7750869681562751, "grad_norm": 43.88094711303711, "learning_rate": 2.249130318437249e-07, "loss": 0.4938, "step": 5793 }, { "epoch": 0.7752207653197752, "grad_norm": 39.039451599121094, "learning_rate": 2.2477923468022477e-07, "loss": 0.2872, "step": 5794 }, { "epoch": 0.7753545624832754, "grad_norm": 31.1230411529541, "learning_rate": 2.2464543751672462e-07, "loss": 0.3304, "step": 5795 }, { "epoch": 0.7754883596467755, "grad_norm": 39.65534973144531, "learning_rate": 2.245116403532245e-07, "loss": 0.3426, "step": 5796 }, { "epoch": 0.7756221568102756, "grad_norm": 56.57108688354492, "learning_rate": 2.2437784318972435e-07, "loss": 0.5579, "step": 5797 }, { "epoch": 0.7757559539737757, "grad_norm": 54.378108978271484, "learning_rate": 2.2424404602622423e-07, "loss": 0.4584, "step": 5798 }, { "epoch": 0.7758897511372759, "grad_norm": 43.57553482055664, "learning_rate": 2.2411024886272409e-07, "loss": 0.4113, "step": 5799 }, { "epoch": 0.776023548300776, "grad_norm": 41.57503890991211, "learning_rate": 2.2397645169922397e-07, "loss": 0.378, "step": 5800 }, { "epoch": 0.7761573454642762, "grad_norm": 33.537418365478516, "learning_rate": 2.2384265453572384e-07, "loss": 0.2107, "step": 5801 }, { "epoch": 0.7762911426277763, "grad_norm": 34.034000396728516, "learning_rate": 2.237088573722237e-07, "loss": 0.3483, "step": 5802 }, { "epoch": 0.7764249397912765, "grad_norm": 48.62044143676758, "learning_rate": 2.2357506020872358e-07, "loss": 0.4838, "step": 5803 }, { "epoch": 0.7765587369547765, "grad_norm": 44.40589141845703, "learning_rate": 2.2344126304522343e-07, "loss": 0.4126, "step": 5804 }, { "epoch": 0.7766925341182767, "grad_norm": 35.42634582519531, "learning_rate": 2.233074658817233e-07, "loss": 0.4324, "step": 5805 }, { "epoch": 0.7768263312817768, "grad_norm": 31.427249908447266, "learning_rate": 2.2317366871822316e-07, "loss": 0.5352, "step": 5806 }, { "epoch": 0.776960128445277, "grad_norm": 23.05181312561035, "learning_rate": 2.2303987155472304e-07, "loss": 0.2631, "step": 5807 }, { "epoch": 0.7770939256087771, "grad_norm": 32.36771011352539, "learning_rate": 2.229060743912229e-07, "loss": 0.3235, "step": 5808 }, { "epoch": 0.7772277227722773, "grad_norm": 39.719974517822266, "learning_rate": 2.2277227722772277e-07, "loss": 0.4005, "step": 5809 }, { "epoch": 0.7773615199357774, "grad_norm": 36.236637115478516, "learning_rate": 2.2263848006422263e-07, "loss": 0.2221, "step": 5810 }, { "epoch": 0.7774953170992775, "grad_norm": 36.077430725097656, "learning_rate": 2.225046829007225e-07, "loss": 0.3802, "step": 5811 }, { "epoch": 0.7776291142627776, "grad_norm": 38.80485534667969, "learning_rate": 2.2237088573722238e-07, "loss": 0.4667, "step": 5812 }, { "epoch": 0.7777629114262777, "grad_norm": 19.796497344970703, "learning_rate": 2.2223708857372224e-07, "loss": 0.2533, "step": 5813 }, { "epoch": 0.7778967085897779, "grad_norm": 34.54215621948242, "learning_rate": 2.2210329141022212e-07, "loss": 0.3084, "step": 5814 }, { "epoch": 0.778030505753278, "grad_norm": 49.84982681274414, "learning_rate": 2.2196949424672194e-07, "loss": 0.5064, "step": 5815 }, { "epoch": 0.7781643029167782, "grad_norm": 58.93018341064453, "learning_rate": 2.2183569708322182e-07, "loss": 0.5283, "step": 5816 }, { "epoch": 0.7782981000802783, "grad_norm": 49.088809967041016, "learning_rate": 2.2170189991972168e-07, "loss": 0.5038, "step": 5817 }, { "epoch": 0.7784318972437785, "grad_norm": 33.4727668762207, "learning_rate": 2.2156810275622155e-07, "loss": 0.2915, "step": 5818 }, { "epoch": 0.7785656944072785, "grad_norm": 50.777259826660156, "learning_rate": 2.214343055927214e-07, "loss": 0.5708, "step": 5819 }, { "epoch": 0.7786994915707787, "grad_norm": 37.98841094970703, "learning_rate": 2.2130050842922129e-07, "loss": 0.2735, "step": 5820 }, { "epoch": 0.7788332887342788, "grad_norm": 20.32459831237793, "learning_rate": 2.2116671126572114e-07, "loss": 0.2035, "step": 5821 }, { "epoch": 0.778967085897779, "grad_norm": 35.211631774902344, "learning_rate": 2.2103291410222102e-07, "loss": 0.3594, "step": 5822 }, { "epoch": 0.7791008830612791, "grad_norm": 28.369020462036133, "learning_rate": 2.2089911693872087e-07, "loss": 0.3189, "step": 5823 }, { "epoch": 0.7792346802247793, "grad_norm": 29.156618118286133, "learning_rate": 2.2076531977522075e-07, "loss": 0.5256, "step": 5824 }, { "epoch": 0.7793684773882794, "grad_norm": 32.497459411621094, "learning_rate": 2.2063152261172063e-07, "loss": 0.3283, "step": 5825 }, { "epoch": 0.7795022745517795, "grad_norm": 27.328632354736328, "learning_rate": 2.2049772544822048e-07, "loss": 0.2814, "step": 5826 }, { "epoch": 0.7796360717152796, "grad_norm": 23.56128692626953, "learning_rate": 2.2036392828472036e-07, "loss": 0.2097, "step": 5827 }, { "epoch": 0.7797698688787797, "grad_norm": 29.57111358642578, "learning_rate": 2.2023013112122021e-07, "loss": 0.3778, "step": 5828 }, { "epoch": 0.7799036660422799, "grad_norm": 27.622339248657227, "learning_rate": 2.200963339577201e-07, "loss": 0.3552, "step": 5829 }, { "epoch": 0.78003746320578, "grad_norm": 38.127281188964844, "learning_rate": 2.1996253679421995e-07, "loss": 0.2693, "step": 5830 }, { "epoch": 0.7801712603692802, "grad_norm": 28.005159378051758, "learning_rate": 2.1982873963071983e-07, "loss": 0.2948, "step": 5831 }, { "epoch": 0.7803050575327803, "grad_norm": 31.1954288482666, "learning_rate": 2.1969494246721968e-07, "loss": 0.2959, "step": 5832 }, { "epoch": 0.7804388546962805, "grad_norm": 23.548789978027344, "learning_rate": 2.1956114530371956e-07, "loss": 0.1728, "step": 5833 }, { "epoch": 0.7805726518597805, "grad_norm": 51.996829986572266, "learning_rate": 2.194273481402194e-07, "loss": 0.4275, "step": 5834 }, { "epoch": 0.7807064490232807, "grad_norm": 37.71440505981445, "learning_rate": 2.192935509767193e-07, "loss": 0.4043, "step": 5835 }, { "epoch": 0.7808402461867808, "grad_norm": 36.42772674560547, "learning_rate": 2.1915975381321917e-07, "loss": 0.3011, "step": 5836 }, { "epoch": 0.780974043350281, "grad_norm": 36.84269332885742, "learning_rate": 2.1902595664971902e-07, "loss": 0.2934, "step": 5837 }, { "epoch": 0.7811078405137811, "grad_norm": 34.21780014038086, "learning_rate": 2.188921594862189e-07, "loss": 0.3328, "step": 5838 }, { "epoch": 0.7812416376772813, "grad_norm": 58.22308349609375, "learning_rate": 2.1875836232271875e-07, "loss": 0.6945, "step": 5839 }, { "epoch": 0.7813754348407814, "grad_norm": 38.21953201293945, "learning_rate": 2.1862456515921863e-07, "loss": 0.3334, "step": 5840 }, { "epoch": 0.7815092320042815, "grad_norm": 46.64885711669922, "learning_rate": 2.1849076799571846e-07, "loss": 0.4453, "step": 5841 }, { "epoch": 0.7816430291677816, "grad_norm": 24.566448211669922, "learning_rate": 2.1835697083221834e-07, "loss": 0.2977, "step": 5842 }, { "epoch": 0.7817768263312818, "grad_norm": 18.9853572845459, "learning_rate": 2.182231736687182e-07, "loss": 0.2968, "step": 5843 }, { "epoch": 0.7819106234947819, "grad_norm": 45.615577697753906, "learning_rate": 2.1808937650521807e-07, "loss": 0.6429, "step": 5844 }, { "epoch": 0.7820444206582821, "grad_norm": 21.663352966308594, "learning_rate": 2.1795557934171792e-07, "loss": 0.2719, "step": 5845 }, { "epoch": 0.7821782178217822, "grad_norm": 31.667543411254883, "learning_rate": 2.178217821782178e-07, "loss": 0.3995, "step": 5846 }, { "epoch": 0.7823120149852824, "grad_norm": 37.09957504272461, "learning_rate": 2.1768798501471768e-07, "loss": 0.4601, "step": 5847 }, { "epoch": 0.7824458121487824, "grad_norm": 55.6026496887207, "learning_rate": 2.1755418785121754e-07, "loss": 0.4805, "step": 5848 }, { "epoch": 0.7825796093122825, "grad_norm": 23.80359649658203, "learning_rate": 2.1742039068771741e-07, "loss": 0.3453, "step": 5849 }, { "epoch": 0.7827134064757827, "grad_norm": 29.542593002319336, "learning_rate": 2.1728659352421727e-07, "loss": 0.541, "step": 5850 }, { "epoch": 0.7828472036392828, "grad_norm": 33.43647003173828, "learning_rate": 2.1715279636071715e-07, "loss": 0.4398, "step": 5851 }, { "epoch": 0.782981000802783, "grad_norm": 31.027238845825195, "learning_rate": 2.17018999197217e-07, "loss": 0.4187, "step": 5852 }, { "epoch": 0.7831147979662831, "grad_norm": 50.88471221923828, "learning_rate": 2.1688520203371688e-07, "loss": 0.5623, "step": 5853 }, { "epoch": 0.7832485951297833, "grad_norm": 56.56265640258789, "learning_rate": 2.1675140487021673e-07, "loss": 0.3833, "step": 5854 }, { "epoch": 0.7833823922932834, "grad_norm": 30.169832229614258, "learning_rate": 2.166176077067166e-07, "loss": 0.4438, "step": 5855 }, { "epoch": 0.7835161894567835, "grad_norm": 43.66333770751953, "learning_rate": 2.1648381054321646e-07, "loss": 0.6004, "step": 5856 }, { "epoch": 0.7836499866202836, "grad_norm": 35.602561950683594, "learning_rate": 2.1635001337971634e-07, "loss": 0.5251, "step": 5857 }, { "epoch": 0.7837837837837838, "grad_norm": 27.05451774597168, "learning_rate": 2.1621621621621622e-07, "loss": 0.4127, "step": 5858 }, { "epoch": 0.7839175809472839, "grad_norm": 35.18065643310547, "learning_rate": 2.1608241905271607e-07, "loss": 0.2272, "step": 5859 }, { "epoch": 0.7840513781107841, "grad_norm": 30.785348892211914, "learning_rate": 2.1594862188921595e-07, "loss": 0.5279, "step": 5860 }, { "epoch": 0.7841851752742842, "grad_norm": 46.5384407043457, "learning_rate": 2.158148247257158e-07, "loss": 0.3105, "step": 5861 }, { "epoch": 0.7843189724377844, "grad_norm": 29.772212982177734, "learning_rate": 2.1568102756221569e-07, "loss": 0.3579, "step": 5862 }, { "epoch": 0.7844527696012844, "grad_norm": 59.71851348876953, "learning_rate": 2.1554723039871554e-07, "loss": 0.3613, "step": 5863 }, { "epoch": 0.7845865667647846, "grad_norm": 31.66663360595703, "learning_rate": 2.1541343323521542e-07, "loss": 0.3646, "step": 5864 }, { "epoch": 0.7847203639282847, "grad_norm": 29.477977752685547, "learning_rate": 2.1527963607171527e-07, "loss": 0.2507, "step": 5865 }, { "epoch": 0.7848541610917849, "grad_norm": 29.465614318847656, "learning_rate": 2.1514583890821515e-07, "loss": 0.332, "step": 5866 }, { "epoch": 0.784987958255285, "grad_norm": 34.950313568115234, "learning_rate": 2.1501204174471498e-07, "loss": 0.3912, "step": 5867 }, { "epoch": 0.7851217554187851, "grad_norm": 43.809165954589844, "learning_rate": 2.1487824458121486e-07, "loss": 0.5607, "step": 5868 }, { "epoch": 0.7852555525822853, "grad_norm": 44.42741012573242, "learning_rate": 2.1474444741771474e-07, "loss": 0.7081, "step": 5869 }, { "epoch": 0.7853893497457853, "grad_norm": 51.03029251098633, "learning_rate": 2.146106502542146e-07, "loss": 0.5563, "step": 5870 }, { "epoch": 0.7855231469092855, "grad_norm": 36.326454162597656, "learning_rate": 2.1447685309071447e-07, "loss": 0.4102, "step": 5871 }, { "epoch": 0.7856569440727856, "grad_norm": 42.98566436767578, "learning_rate": 2.1434305592721432e-07, "loss": 0.4248, "step": 5872 }, { "epoch": 0.7857907412362858, "grad_norm": 34.430816650390625, "learning_rate": 2.142092587637142e-07, "loss": 0.332, "step": 5873 }, { "epoch": 0.7859245383997859, "grad_norm": 60.57834243774414, "learning_rate": 2.1407546160021405e-07, "loss": 0.6026, "step": 5874 }, { "epoch": 0.7860583355632861, "grad_norm": 34.7652587890625, "learning_rate": 2.1394166443671393e-07, "loss": 0.2645, "step": 5875 }, { "epoch": 0.7861921327267862, "grad_norm": 44.22993469238281, "learning_rate": 2.1380786727321378e-07, "loss": 0.6292, "step": 5876 }, { "epoch": 0.7863259298902864, "grad_norm": 38.08737564086914, "learning_rate": 2.1367407010971366e-07, "loss": 0.6221, "step": 5877 }, { "epoch": 0.7864597270537864, "grad_norm": 35.30429458618164, "learning_rate": 2.1354027294621352e-07, "loss": 0.5171, "step": 5878 }, { "epoch": 0.7865935242172866, "grad_norm": 24.817825317382812, "learning_rate": 2.134064757827134e-07, "loss": 0.3601, "step": 5879 }, { "epoch": 0.7867273213807867, "grad_norm": 45.18745422363281, "learning_rate": 2.1327267861921327e-07, "loss": 0.3512, "step": 5880 }, { "epoch": 0.7868611185442869, "grad_norm": 27.709566116333008, "learning_rate": 2.1313888145571313e-07, "loss": 0.2448, "step": 5881 }, { "epoch": 0.786994915707787, "grad_norm": 29.272602081298828, "learning_rate": 2.13005084292213e-07, "loss": 0.3993, "step": 5882 }, { "epoch": 0.7871287128712872, "grad_norm": 39.29494094848633, "learning_rate": 2.1287128712871286e-07, "loss": 0.3592, "step": 5883 }, { "epoch": 0.7872625100347873, "grad_norm": 34.108428955078125, "learning_rate": 2.1273748996521274e-07, "loss": 0.323, "step": 5884 }, { "epoch": 0.7873963071982873, "grad_norm": 30.160175323486328, "learning_rate": 2.126036928017126e-07, "loss": 0.3717, "step": 5885 }, { "epoch": 0.7875301043617875, "grad_norm": 32.6800651550293, "learning_rate": 2.1246989563821247e-07, "loss": 0.3713, "step": 5886 }, { "epoch": 0.7876639015252876, "grad_norm": 24.88990592956543, "learning_rate": 2.1233609847471232e-07, "loss": 0.2949, "step": 5887 }, { "epoch": 0.7877976986887878, "grad_norm": 26.824893951416016, "learning_rate": 2.122023013112122e-07, "loss": 0.4339, "step": 5888 }, { "epoch": 0.7879314958522879, "grad_norm": 32.157752990722656, "learning_rate": 2.1206850414771206e-07, "loss": 0.342, "step": 5889 }, { "epoch": 0.7880652930157881, "grad_norm": 38.62179183959961, "learning_rate": 2.1193470698421194e-07, "loss": 0.3345, "step": 5890 }, { "epoch": 0.7881990901792882, "grad_norm": 46.59391784667969, "learning_rate": 2.1180090982071181e-07, "loss": 0.2414, "step": 5891 }, { "epoch": 0.7883328873427883, "grad_norm": 22.908235549926758, "learning_rate": 2.1166711265721167e-07, "loss": 0.3504, "step": 5892 }, { "epoch": 0.7884666845062884, "grad_norm": 20.86458396911621, "learning_rate": 2.1153331549371155e-07, "loss": 0.1839, "step": 5893 }, { "epoch": 0.7886004816697886, "grad_norm": 29.551433563232422, "learning_rate": 2.1139951833021137e-07, "loss": 0.3315, "step": 5894 }, { "epoch": 0.7887342788332887, "grad_norm": 26.52204704284668, "learning_rate": 2.1126572116671125e-07, "loss": 0.2564, "step": 5895 }, { "epoch": 0.7888680759967889, "grad_norm": 34.1965217590332, "learning_rate": 2.111319240032111e-07, "loss": 0.1477, "step": 5896 }, { "epoch": 0.789001873160289, "grad_norm": 47.675079345703125, "learning_rate": 2.1099812683971098e-07, "loss": 0.3712, "step": 5897 }, { "epoch": 0.7891356703237892, "grad_norm": 21.492666244506836, "learning_rate": 2.1086432967621084e-07, "loss": 0.2508, "step": 5898 }, { "epoch": 0.7892694674872893, "grad_norm": 45.08857727050781, "learning_rate": 2.1073053251271072e-07, "loss": 0.4846, "step": 5899 }, { "epoch": 0.7894032646507894, "grad_norm": 26.4798583984375, "learning_rate": 2.1059673534921057e-07, "loss": 0.2848, "step": 5900 }, { "epoch": 0.7895370618142895, "grad_norm": 42.07830047607422, "learning_rate": 2.1046293818571045e-07, "loss": 0.3654, "step": 5901 }, { "epoch": 0.7896708589777897, "grad_norm": 51.91251754760742, "learning_rate": 2.1032914102221033e-07, "loss": 0.4867, "step": 5902 }, { "epoch": 0.7898046561412898, "grad_norm": 41.197654724121094, "learning_rate": 2.1019534385871018e-07, "loss": 0.4418, "step": 5903 }, { "epoch": 0.78993845330479, "grad_norm": 27.402851104736328, "learning_rate": 2.1006154669521006e-07, "loss": 0.4359, "step": 5904 }, { "epoch": 0.7900722504682901, "grad_norm": 29.997507095336914, "learning_rate": 2.099277495317099e-07, "loss": 0.3289, "step": 5905 }, { "epoch": 0.7902060476317903, "grad_norm": 47.437583923339844, "learning_rate": 2.097939523682098e-07, "loss": 0.5483, "step": 5906 }, { "epoch": 0.7903398447952903, "grad_norm": 38.48162841796875, "learning_rate": 2.0966015520470964e-07, "loss": 0.4591, "step": 5907 }, { "epoch": 0.7904736419587904, "grad_norm": 30.24778175354004, "learning_rate": 2.0952635804120952e-07, "loss": 0.3439, "step": 5908 }, { "epoch": 0.7906074391222906, "grad_norm": 35.27273941040039, "learning_rate": 2.0939256087770938e-07, "loss": 0.354, "step": 5909 }, { "epoch": 0.7907412362857907, "grad_norm": 46.58556365966797, "learning_rate": 2.0925876371420926e-07, "loss": 0.4671, "step": 5910 }, { "epoch": 0.7908750334492909, "grad_norm": 36.33652114868164, "learning_rate": 2.091249665507091e-07, "loss": 0.3357, "step": 5911 }, { "epoch": 0.791008830612791, "grad_norm": 21.778371810913086, "learning_rate": 2.08991169387209e-07, "loss": 0.2588, "step": 5912 }, { "epoch": 0.7911426277762912, "grad_norm": 24.99688148498535, "learning_rate": 2.0885737222370887e-07, "loss": 0.2019, "step": 5913 }, { "epoch": 0.7912764249397912, "grad_norm": 26.119741439819336, "learning_rate": 2.0872357506020872e-07, "loss": 0.2843, "step": 5914 }, { "epoch": 0.7914102221032914, "grad_norm": 37.29553985595703, "learning_rate": 2.085897778967086e-07, "loss": 0.3253, "step": 5915 }, { "epoch": 0.7915440192667915, "grad_norm": 39.22493362426758, "learning_rate": 2.0845598073320845e-07, "loss": 0.3528, "step": 5916 }, { "epoch": 0.7916778164302917, "grad_norm": 49.861106872558594, "learning_rate": 2.0832218356970833e-07, "loss": 0.4472, "step": 5917 }, { "epoch": 0.7918116135937918, "grad_norm": 32.6756477355957, "learning_rate": 2.0818838640620818e-07, "loss": 0.3014, "step": 5918 }, { "epoch": 0.791945410757292, "grad_norm": 30.79366111755371, "learning_rate": 2.0805458924270806e-07, "loss": 0.4515, "step": 5919 }, { "epoch": 0.7920792079207921, "grad_norm": 23.66529655456543, "learning_rate": 2.079207920792079e-07, "loss": 0.2769, "step": 5920 }, { "epoch": 0.7922130050842923, "grad_norm": 43.34184646606445, "learning_rate": 2.0778699491570777e-07, "loss": 0.4603, "step": 5921 }, { "epoch": 0.7923468022477923, "grad_norm": 60.51322555541992, "learning_rate": 2.0765319775220762e-07, "loss": 0.5483, "step": 5922 }, { "epoch": 0.7924805994112925, "grad_norm": 30.651426315307617, "learning_rate": 2.075194005887075e-07, "loss": 0.2796, "step": 5923 }, { "epoch": 0.7926143965747926, "grad_norm": 29.011627197265625, "learning_rate": 2.0738560342520738e-07, "loss": 0.2944, "step": 5924 }, { "epoch": 0.7927481937382928, "grad_norm": 35.88657760620117, "learning_rate": 2.0725180626170723e-07, "loss": 0.3351, "step": 5925 }, { "epoch": 0.7928819909017929, "grad_norm": 52.622798919677734, "learning_rate": 2.071180090982071e-07, "loss": 0.5466, "step": 5926 }, { "epoch": 0.793015788065293, "grad_norm": 35.16899871826172, "learning_rate": 2.0698421193470697e-07, "loss": 0.3645, "step": 5927 }, { "epoch": 0.7931495852287932, "grad_norm": 25.913122177124023, "learning_rate": 2.0685041477120684e-07, "loss": 0.3742, "step": 5928 }, { "epoch": 0.7932833823922932, "grad_norm": 25.508121490478516, "learning_rate": 2.067166176077067e-07, "loss": 0.2914, "step": 5929 }, { "epoch": 0.7934171795557934, "grad_norm": 41.13501739501953, "learning_rate": 2.0658282044420658e-07, "loss": 0.554, "step": 5930 }, { "epoch": 0.7935509767192935, "grad_norm": 37.53587341308594, "learning_rate": 2.0644902328070643e-07, "loss": 0.2711, "step": 5931 }, { "epoch": 0.7936847738827937, "grad_norm": 36.11640167236328, "learning_rate": 2.063152261172063e-07, "loss": 0.3035, "step": 5932 }, { "epoch": 0.7938185710462938, "grad_norm": 26.126493453979492, "learning_rate": 2.0618142895370616e-07, "loss": 0.2116, "step": 5933 }, { "epoch": 0.793952368209794, "grad_norm": 49.34888458251953, "learning_rate": 2.0604763179020604e-07, "loss": 0.4172, "step": 5934 }, { "epoch": 0.7940861653732941, "grad_norm": 56.88228225708008, "learning_rate": 2.0591383462670592e-07, "loss": 0.4302, "step": 5935 }, { "epoch": 0.7942199625367942, "grad_norm": 43.97092819213867, "learning_rate": 2.0578003746320577e-07, "loss": 0.4256, "step": 5936 }, { "epoch": 0.7943537597002943, "grad_norm": 24.811992645263672, "learning_rate": 2.0564624029970565e-07, "loss": 0.3366, "step": 5937 }, { "epoch": 0.7944875568637945, "grad_norm": 46.52334213256836, "learning_rate": 2.055124431362055e-07, "loss": 0.3811, "step": 5938 }, { "epoch": 0.7946213540272946, "grad_norm": 48.787864685058594, "learning_rate": 2.0537864597270538e-07, "loss": 0.4139, "step": 5939 }, { "epoch": 0.7947551511907948, "grad_norm": 42.98524856567383, "learning_rate": 2.0524484880920524e-07, "loss": 0.5175, "step": 5940 }, { "epoch": 0.7948889483542949, "grad_norm": 24.671876907348633, "learning_rate": 2.0511105164570512e-07, "loss": 0.2294, "step": 5941 }, { "epoch": 0.7950227455177951, "grad_norm": 34.182247161865234, "learning_rate": 2.0497725448220497e-07, "loss": 0.3098, "step": 5942 }, { "epoch": 0.7951565426812952, "grad_norm": 29.572206497192383, "learning_rate": 2.0484345731870485e-07, "loss": 0.3015, "step": 5943 }, { "epoch": 0.7952903398447952, "grad_norm": 20.404531478881836, "learning_rate": 2.047096601552047e-07, "loss": 0.119, "step": 5944 }, { "epoch": 0.7954241370082954, "grad_norm": 37.6457633972168, "learning_rate": 2.0457586299170458e-07, "loss": 0.4253, "step": 5945 }, { "epoch": 0.7955579341717955, "grad_norm": 43.493255615234375, "learning_rate": 2.0444206582820443e-07, "loss": 0.4134, "step": 5946 }, { "epoch": 0.7956917313352957, "grad_norm": 26.471412658691406, "learning_rate": 2.0430826866470429e-07, "loss": 0.3096, "step": 5947 }, { "epoch": 0.7958255284987958, "grad_norm": 53.973140716552734, "learning_rate": 2.0417447150120417e-07, "loss": 0.3391, "step": 5948 }, { "epoch": 0.795959325662296, "grad_norm": 16.172313690185547, "learning_rate": 2.0404067433770402e-07, "loss": 0.1884, "step": 5949 }, { "epoch": 0.7960931228257961, "grad_norm": 32.61302185058594, "learning_rate": 2.039068771742039e-07, "loss": 0.3686, "step": 5950 }, { "epoch": 0.7962269199892962, "grad_norm": 64.08366394042969, "learning_rate": 2.0377308001070375e-07, "loss": 0.4524, "step": 5951 }, { "epoch": 0.7963607171527963, "grad_norm": 41.054813385009766, "learning_rate": 2.0363928284720363e-07, "loss": 0.2364, "step": 5952 }, { "epoch": 0.7964945143162965, "grad_norm": 29.503280639648438, "learning_rate": 2.0350548568370348e-07, "loss": 0.3053, "step": 5953 }, { "epoch": 0.7966283114797966, "grad_norm": 38.12965774536133, "learning_rate": 2.0337168852020336e-07, "loss": 0.2374, "step": 5954 }, { "epoch": 0.7967621086432968, "grad_norm": 45.98668670654297, "learning_rate": 2.0323789135670321e-07, "loss": 0.542, "step": 5955 }, { "epoch": 0.7968959058067969, "grad_norm": 36.93436813354492, "learning_rate": 2.031040941932031e-07, "loss": 0.2401, "step": 5956 }, { "epoch": 0.7970297029702971, "grad_norm": 36.46733474731445, "learning_rate": 2.0297029702970297e-07, "loss": 0.4476, "step": 5957 }, { "epoch": 0.7971635001337971, "grad_norm": 52.85204315185547, "learning_rate": 2.0283649986620283e-07, "loss": 0.5081, "step": 5958 }, { "epoch": 0.7972972972972973, "grad_norm": 56.49555206298828, "learning_rate": 2.027027027027027e-07, "loss": 0.3861, "step": 5959 }, { "epoch": 0.7974310944607974, "grad_norm": 29.906597137451172, "learning_rate": 2.0256890553920256e-07, "loss": 0.3447, "step": 5960 }, { "epoch": 0.7975648916242976, "grad_norm": 27.619447708129883, "learning_rate": 2.0243510837570244e-07, "loss": 0.1655, "step": 5961 }, { "epoch": 0.7976986887877977, "grad_norm": 23.56385612487793, "learning_rate": 2.023013112122023e-07, "loss": 0.1778, "step": 5962 }, { "epoch": 0.7978324859512979, "grad_norm": 43.6572380065918, "learning_rate": 2.0216751404870217e-07, "loss": 0.3765, "step": 5963 }, { "epoch": 0.797966283114798, "grad_norm": 24.922653198242188, "learning_rate": 2.0203371688520202e-07, "loss": 0.1858, "step": 5964 }, { "epoch": 0.7981000802782982, "grad_norm": 39.48143005371094, "learning_rate": 2.018999197217019e-07, "loss": 0.3478, "step": 5965 }, { "epoch": 0.7982338774417982, "grad_norm": 57.19672393798828, "learning_rate": 2.0176612255820175e-07, "loss": 0.4153, "step": 5966 }, { "epoch": 0.7983676746052983, "grad_norm": 48.702640533447266, "learning_rate": 2.0163232539470163e-07, "loss": 0.4851, "step": 5967 }, { "epoch": 0.7985014717687985, "grad_norm": 42.3027229309082, "learning_rate": 2.0149852823120149e-07, "loss": 0.4798, "step": 5968 }, { "epoch": 0.7986352689322986, "grad_norm": 29.591148376464844, "learning_rate": 2.0136473106770137e-07, "loss": 0.2099, "step": 5969 }, { "epoch": 0.7987690660957988, "grad_norm": 38.74102020263672, "learning_rate": 2.0123093390420124e-07, "loss": 0.2869, "step": 5970 }, { "epoch": 0.7989028632592989, "grad_norm": 57.30470275878906, "learning_rate": 2.010971367407011e-07, "loss": 0.4412, "step": 5971 }, { "epoch": 0.7990366604227991, "grad_norm": 23.7337646484375, "learning_rate": 2.0096333957720095e-07, "loss": 0.2863, "step": 5972 }, { "epoch": 0.7991704575862991, "grad_norm": 38.32430648803711, "learning_rate": 2.008295424137008e-07, "loss": 0.3896, "step": 5973 }, { "epoch": 0.7993042547497993, "grad_norm": 47.48217010498047, "learning_rate": 2.0069574525020068e-07, "loss": 0.494, "step": 5974 }, { "epoch": 0.7994380519132994, "grad_norm": 61.6131706237793, "learning_rate": 2.0056194808670054e-07, "loss": 0.1802, "step": 5975 }, { "epoch": 0.7995718490767996, "grad_norm": 37.515235900878906, "learning_rate": 2.0042815092320041e-07, "loss": 0.3387, "step": 5976 }, { "epoch": 0.7997056462402997, "grad_norm": 42.388206481933594, "learning_rate": 2.0029435375970027e-07, "loss": 0.3783, "step": 5977 }, { "epoch": 0.7998394434037999, "grad_norm": 29.418989181518555, "learning_rate": 2.0016055659620015e-07, "loss": 0.2427, "step": 5978 }, { "epoch": 0.7999732405673, "grad_norm": 36.157466888427734, "learning_rate": 2.000267594327e-07, "loss": 0.3502, "step": 5979 }, { "epoch": 0.8001070377308, "grad_norm": 29.602304458618164, "learning_rate": 1.9989296226919988e-07, "loss": 0.2727, "step": 5980 }, { "epoch": 0.8002408348943002, "grad_norm": 60.2110710144043, "learning_rate": 1.9975916510569976e-07, "loss": 0.6045, "step": 5981 }, { "epoch": 0.8003746320578004, "grad_norm": 46.67478942871094, "learning_rate": 1.996253679421996e-07, "loss": 0.4099, "step": 5982 }, { "epoch": 0.8005084292213005, "grad_norm": 80.79196166992188, "learning_rate": 1.994915707786995e-07, "loss": 0.7593, "step": 5983 }, { "epoch": 0.8006422263848006, "grad_norm": 28.446792602539062, "learning_rate": 1.9935777361519934e-07, "loss": 0.3208, "step": 5984 }, { "epoch": 0.8007760235483008, "grad_norm": 44.20688247680664, "learning_rate": 1.9922397645169922e-07, "loss": 0.3228, "step": 5985 }, { "epoch": 0.800909820711801, "grad_norm": 31.105731964111328, "learning_rate": 1.9909017928819907e-07, "loss": 0.2447, "step": 5986 }, { "epoch": 0.8010436178753011, "grad_norm": 35.699344635009766, "learning_rate": 1.9895638212469895e-07, "loss": 0.279, "step": 5987 }, { "epoch": 0.8011774150388011, "grad_norm": 46.1633415222168, "learning_rate": 1.988225849611988e-07, "loss": 0.364, "step": 5988 }, { "epoch": 0.8013112122023013, "grad_norm": 40.196388244628906, "learning_rate": 1.9868878779769869e-07, "loss": 0.4187, "step": 5989 }, { "epoch": 0.8014450093658014, "grad_norm": 22.387527465820312, "learning_rate": 1.9855499063419854e-07, "loss": 0.1659, "step": 5990 }, { "epoch": 0.8015788065293016, "grad_norm": 34.354061126708984, "learning_rate": 1.9842119347069842e-07, "loss": 0.3737, "step": 5991 }, { "epoch": 0.8017126036928017, "grad_norm": 35.04619598388672, "learning_rate": 1.982873963071983e-07, "loss": 0.1852, "step": 5992 }, { "epoch": 0.8018464008563019, "grad_norm": 35.16862106323242, "learning_rate": 1.9815359914369815e-07, "loss": 0.3952, "step": 5993 }, { "epoch": 0.801980198019802, "grad_norm": 47.70779800415039, "learning_rate": 1.9801980198019803e-07, "loss": 0.4992, "step": 5994 }, { "epoch": 0.8021139951833021, "grad_norm": 19.993356704711914, "learning_rate": 1.9788600481669788e-07, "loss": 0.1952, "step": 5995 }, { "epoch": 0.8022477923468022, "grad_norm": 52.79712677001953, "learning_rate": 1.9775220765319776e-07, "loss": 0.446, "step": 5996 }, { "epoch": 0.8023815895103024, "grad_norm": 42.22655487060547, "learning_rate": 1.9761841048969761e-07, "loss": 0.3832, "step": 5997 }, { "epoch": 0.8025153866738025, "grad_norm": 33.291893005371094, "learning_rate": 1.9748461332619747e-07, "loss": 0.2865, "step": 5998 }, { "epoch": 0.8026491838373027, "grad_norm": 52.62428665161133, "learning_rate": 1.9735081616269732e-07, "loss": 0.5191, "step": 5999 }, { "epoch": 0.8027829810008028, "grad_norm": 33.80821990966797, "learning_rate": 1.972170189991972e-07, "loss": 0.3687, "step": 6000 }, { "epoch": 0.802916778164303, "grad_norm": 44.613006591796875, "learning_rate": 1.9708322183569705e-07, "loss": 0.3895, "step": 6001 }, { "epoch": 0.803050575327803, "grad_norm": 41.583866119384766, "learning_rate": 1.9694942467219693e-07, "loss": 0.4985, "step": 6002 }, { "epoch": 0.8031843724913031, "grad_norm": 30.404272079467773, "learning_rate": 1.968156275086968e-07, "loss": 0.367, "step": 6003 }, { "epoch": 0.8033181696548033, "grad_norm": 40.19762420654297, "learning_rate": 1.9668183034519666e-07, "loss": 0.239, "step": 6004 }, { "epoch": 0.8034519668183034, "grad_norm": 18.569580078125, "learning_rate": 1.9654803318169654e-07, "loss": 0.1218, "step": 6005 }, { "epoch": 0.8035857639818036, "grad_norm": 57.95931625366211, "learning_rate": 1.964142360181964e-07, "loss": 0.372, "step": 6006 }, { "epoch": 0.8037195611453037, "grad_norm": 27.29948616027832, "learning_rate": 1.9628043885469627e-07, "loss": 0.2739, "step": 6007 }, { "epoch": 0.8038533583088039, "grad_norm": 55.47807693481445, "learning_rate": 1.9614664169119613e-07, "loss": 0.3727, "step": 6008 }, { "epoch": 0.803987155472304, "grad_norm": 31.840736389160156, "learning_rate": 1.96012844527696e-07, "loss": 0.4838, "step": 6009 }, { "epoch": 0.8041209526358041, "grad_norm": 37.76003646850586, "learning_rate": 1.9587904736419586e-07, "loss": 0.3467, "step": 6010 }, { "epoch": 0.8042547497993042, "grad_norm": 42.60310745239258, "learning_rate": 1.9574525020069574e-07, "loss": 0.515, "step": 6011 }, { "epoch": 0.8043885469628044, "grad_norm": 45.114803314208984, "learning_rate": 1.956114530371956e-07, "loss": 0.5221, "step": 6012 }, { "epoch": 0.8045223441263045, "grad_norm": 47.60190963745117, "learning_rate": 1.9547765587369547e-07, "loss": 0.2441, "step": 6013 }, { "epoch": 0.8046561412898047, "grad_norm": 46.48636245727539, "learning_rate": 1.9534385871019535e-07, "loss": 0.5803, "step": 6014 }, { "epoch": 0.8047899384533048, "grad_norm": 48.6907958984375, "learning_rate": 1.952100615466952e-07, "loss": 0.5287, "step": 6015 }, { "epoch": 0.804923735616805, "grad_norm": 37.71184539794922, "learning_rate": 1.9507626438319508e-07, "loss": 0.3278, "step": 6016 }, { "epoch": 0.805057532780305, "grad_norm": 46.0338020324707, "learning_rate": 1.9494246721969494e-07, "loss": 0.5804, "step": 6017 }, { "epoch": 0.8051913299438052, "grad_norm": 31.599679946899414, "learning_rate": 1.9480867005619481e-07, "loss": 0.3212, "step": 6018 }, { "epoch": 0.8053251271073053, "grad_norm": 41.72129821777344, "learning_rate": 1.9467487289269467e-07, "loss": 0.3356, "step": 6019 }, { "epoch": 0.8054589242708055, "grad_norm": 39.50738525390625, "learning_rate": 1.9454107572919455e-07, "loss": 0.2291, "step": 6020 }, { "epoch": 0.8055927214343056, "grad_norm": 32.612998962402344, "learning_rate": 1.944072785656944e-07, "loss": 0.4044, "step": 6021 }, { "epoch": 0.8057265185978058, "grad_norm": 47.96084213256836, "learning_rate": 1.9427348140219428e-07, "loss": 0.564, "step": 6022 }, { "epoch": 0.8058603157613059, "grad_norm": 23.189218521118164, "learning_rate": 1.9413968423869413e-07, "loss": 0.1677, "step": 6023 }, { "epoch": 0.8059941129248059, "grad_norm": 34.0814323425293, "learning_rate": 1.9400588707519398e-07, "loss": 0.3619, "step": 6024 }, { "epoch": 0.8061279100883061, "grad_norm": 47.74324417114258, "learning_rate": 1.9387208991169386e-07, "loss": 0.3667, "step": 6025 }, { "epoch": 0.8062617072518062, "grad_norm": 22.104637145996094, "learning_rate": 1.9373829274819372e-07, "loss": 0.1157, "step": 6026 }, { "epoch": 0.8063955044153064, "grad_norm": 48.27088928222656, "learning_rate": 1.936044955846936e-07, "loss": 0.3772, "step": 6027 }, { "epoch": 0.8065293015788065, "grad_norm": 73.08007049560547, "learning_rate": 1.9347069842119345e-07, "loss": 0.6871, "step": 6028 }, { "epoch": 0.8066630987423067, "grad_norm": 49.54959487915039, "learning_rate": 1.9333690125769333e-07, "loss": 0.4672, "step": 6029 }, { "epoch": 0.8067968959058068, "grad_norm": 41.44097900390625, "learning_rate": 1.9320310409419318e-07, "loss": 0.3906, "step": 6030 }, { "epoch": 0.806930693069307, "grad_norm": 32.2588996887207, "learning_rate": 1.9306930693069306e-07, "loss": 0.4636, "step": 6031 }, { "epoch": 0.807064490232807, "grad_norm": 43.3687858581543, "learning_rate": 1.929355097671929e-07, "loss": 0.3708, "step": 6032 }, { "epoch": 0.8071982873963072, "grad_norm": 25.545082092285156, "learning_rate": 1.928017126036928e-07, "loss": 0.3059, "step": 6033 }, { "epoch": 0.8073320845598073, "grad_norm": 63.63025665283203, "learning_rate": 1.9266791544019264e-07, "loss": 0.6169, "step": 6034 }, { "epoch": 0.8074658817233075, "grad_norm": 35.29244613647461, "learning_rate": 1.9253411827669252e-07, "loss": 0.5458, "step": 6035 }, { "epoch": 0.8075996788868076, "grad_norm": 34.18653106689453, "learning_rate": 1.924003211131924e-07, "loss": 0.2989, "step": 6036 }, { "epoch": 0.8077334760503078, "grad_norm": 44.61872863769531, "learning_rate": 1.9226652394969226e-07, "loss": 0.3707, "step": 6037 }, { "epoch": 0.8078672732138079, "grad_norm": 41.22525405883789, "learning_rate": 1.9213272678619214e-07, "loss": 0.5054, "step": 6038 }, { "epoch": 0.808001070377308, "grad_norm": 27.422609329223633, "learning_rate": 1.91998929622692e-07, "loss": 0.2865, "step": 6039 }, { "epoch": 0.8081348675408081, "grad_norm": 45.10916519165039, "learning_rate": 1.9186513245919187e-07, "loss": 0.7219, "step": 6040 }, { "epoch": 0.8082686647043082, "grad_norm": 53.546932220458984, "learning_rate": 1.9173133529569172e-07, "loss": 0.4894, "step": 6041 }, { "epoch": 0.8084024618678084, "grad_norm": 38.41632843017578, "learning_rate": 1.915975381321916e-07, "loss": 0.3896, "step": 6042 }, { "epoch": 0.8085362590313085, "grad_norm": 34.47414779663086, "learning_rate": 1.9146374096869145e-07, "loss": 0.3158, "step": 6043 }, { "epoch": 0.8086700561948087, "grad_norm": 26.49869728088379, "learning_rate": 1.9132994380519133e-07, "loss": 0.2003, "step": 6044 }, { "epoch": 0.8088038533583088, "grad_norm": 36.458656311035156, "learning_rate": 1.9119614664169118e-07, "loss": 0.248, "step": 6045 }, { "epoch": 0.808937650521809, "grad_norm": 35.55910873413086, "learning_rate": 1.9106234947819106e-07, "loss": 0.3154, "step": 6046 }, { "epoch": 0.809071447685309, "grad_norm": 56.625404357910156, "learning_rate": 1.9092855231469094e-07, "loss": 0.3309, "step": 6047 }, { "epoch": 0.8092052448488092, "grad_norm": 62.04796600341797, "learning_rate": 1.907947551511908e-07, "loss": 0.5731, "step": 6048 }, { "epoch": 0.8093390420123093, "grad_norm": 37.41118240356445, "learning_rate": 1.9066095798769067e-07, "loss": 0.2642, "step": 6049 }, { "epoch": 0.8094728391758095, "grad_norm": 59.083160400390625, "learning_rate": 1.905271608241905e-07, "loss": 0.6503, "step": 6050 }, { "epoch": 0.8096066363393096, "grad_norm": 58.113807678222656, "learning_rate": 1.9039336366069038e-07, "loss": 0.6138, "step": 6051 }, { "epoch": 0.8097404335028098, "grad_norm": 44.76179504394531, "learning_rate": 1.9025956649719023e-07, "loss": 0.3292, "step": 6052 }, { "epoch": 0.8098742306663099, "grad_norm": 37.34121322631836, "learning_rate": 1.901257693336901e-07, "loss": 0.3286, "step": 6053 }, { "epoch": 0.81000802782981, "grad_norm": 30.07959747314453, "learning_rate": 1.8999197217018997e-07, "loss": 0.3124, "step": 6054 }, { "epoch": 0.8101418249933101, "grad_norm": 67.03507232666016, "learning_rate": 1.8985817500668984e-07, "loss": 0.7048, "step": 6055 }, { "epoch": 0.8102756221568103, "grad_norm": 33.537105560302734, "learning_rate": 1.897243778431897e-07, "loss": 0.401, "step": 6056 }, { "epoch": 0.8104094193203104, "grad_norm": 24.539306640625, "learning_rate": 1.8959058067968958e-07, "loss": 0.2794, "step": 6057 }, { "epoch": 0.8105432164838106, "grad_norm": 45.065792083740234, "learning_rate": 1.8945678351618946e-07, "loss": 0.4933, "step": 6058 }, { "epoch": 0.8106770136473107, "grad_norm": 30.076194763183594, "learning_rate": 1.893229863526893e-07, "loss": 0.232, "step": 6059 }, { "epoch": 0.8108108108108109, "grad_norm": 33.094173431396484, "learning_rate": 1.891891891891892e-07, "loss": 0.3601, "step": 6060 }, { "epoch": 0.8109446079743109, "grad_norm": 32.20258331298828, "learning_rate": 1.8905539202568904e-07, "loss": 0.4674, "step": 6061 }, { "epoch": 0.811078405137811, "grad_norm": 25.255640029907227, "learning_rate": 1.8892159486218892e-07, "loss": 0.2862, "step": 6062 }, { "epoch": 0.8112122023013112, "grad_norm": 66.8658447265625, "learning_rate": 1.8878779769868877e-07, "loss": 0.6324, "step": 6063 }, { "epoch": 0.8113459994648113, "grad_norm": 40.14030838012695, "learning_rate": 1.8865400053518865e-07, "loss": 0.4406, "step": 6064 }, { "epoch": 0.8114797966283115, "grad_norm": 42.24664306640625, "learning_rate": 1.885202033716885e-07, "loss": 0.451, "step": 6065 }, { "epoch": 0.8116135937918116, "grad_norm": 32.81924819946289, "learning_rate": 1.8838640620818838e-07, "loss": 0.3373, "step": 6066 }, { "epoch": 0.8117473909553118, "grad_norm": 35.355712890625, "learning_rate": 1.8825260904468824e-07, "loss": 0.2299, "step": 6067 }, { "epoch": 0.8118811881188119, "grad_norm": 45.298240661621094, "learning_rate": 1.8811881188118812e-07, "loss": 0.3342, "step": 6068 }, { "epoch": 0.812014985282312, "grad_norm": 46.23775863647461, "learning_rate": 1.87985014717688e-07, "loss": 0.4274, "step": 6069 }, { "epoch": 0.8121487824458121, "grad_norm": 34.051456451416016, "learning_rate": 1.8785121755418785e-07, "loss": 0.3955, "step": 6070 }, { "epoch": 0.8122825796093123, "grad_norm": 28.59510612487793, "learning_rate": 1.8771742039068773e-07, "loss": 0.3949, "step": 6071 }, { "epoch": 0.8124163767728124, "grad_norm": 21.58180809020996, "learning_rate": 1.8758362322718758e-07, "loss": 0.2789, "step": 6072 }, { "epoch": 0.8125501739363126, "grad_norm": 28.96858787536621, "learning_rate": 1.8744982606368746e-07, "loss": 0.3708, "step": 6073 }, { "epoch": 0.8126839710998127, "grad_norm": 33.05192947387695, "learning_rate": 1.873160289001873e-07, "loss": 0.3903, "step": 6074 }, { "epoch": 0.8128177682633129, "grad_norm": 39.35911178588867, "learning_rate": 1.871822317366872e-07, "loss": 0.382, "step": 6075 }, { "epoch": 0.8129515654268129, "grad_norm": 40.62821578979492, "learning_rate": 1.8704843457318702e-07, "loss": 0.4403, "step": 6076 }, { "epoch": 0.813085362590313, "grad_norm": 38.52106857299805, "learning_rate": 1.869146374096869e-07, "loss": 0.645, "step": 6077 }, { "epoch": 0.8132191597538132, "grad_norm": 37.23530578613281, "learning_rate": 1.8678084024618675e-07, "loss": 0.4747, "step": 6078 }, { "epoch": 0.8133529569173134, "grad_norm": 33.900569915771484, "learning_rate": 1.8664704308268663e-07, "loss": 0.241, "step": 6079 }, { "epoch": 0.8134867540808135, "grad_norm": 31.73480987548828, "learning_rate": 1.865132459191865e-07, "loss": 0.4692, "step": 6080 }, { "epoch": 0.8136205512443137, "grad_norm": 42.30733108520508, "learning_rate": 1.8637944875568636e-07, "loss": 0.4102, "step": 6081 }, { "epoch": 0.8137543484078138, "grad_norm": 32.27979278564453, "learning_rate": 1.8624565159218624e-07, "loss": 0.3275, "step": 6082 }, { "epoch": 0.8138881455713138, "grad_norm": 34.96104431152344, "learning_rate": 1.861118544286861e-07, "loss": 0.2704, "step": 6083 }, { "epoch": 0.814021942734814, "grad_norm": 47.12677001953125, "learning_rate": 1.8597805726518597e-07, "loss": 0.4807, "step": 6084 }, { "epoch": 0.8141557398983141, "grad_norm": 50.42140197753906, "learning_rate": 1.8584426010168583e-07, "loss": 0.5064, "step": 6085 }, { "epoch": 0.8142895370618143, "grad_norm": 42.8564453125, "learning_rate": 1.857104629381857e-07, "loss": 0.4734, "step": 6086 }, { "epoch": 0.8144233342253144, "grad_norm": 62.58317947387695, "learning_rate": 1.8557666577468556e-07, "loss": 0.3821, "step": 6087 }, { "epoch": 0.8145571313888146, "grad_norm": 25.747364044189453, "learning_rate": 1.8544286861118544e-07, "loss": 0.2403, "step": 6088 }, { "epoch": 0.8146909285523147, "grad_norm": 47.242671966552734, "learning_rate": 1.853090714476853e-07, "loss": 0.4132, "step": 6089 }, { "epoch": 0.8148247257158149, "grad_norm": 49.892066955566406, "learning_rate": 1.8517527428418517e-07, "loss": 0.552, "step": 6090 }, { "epoch": 0.8149585228793149, "grad_norm": 15.936903953552246, "learning_rate": 1.8504147712068505e-07, "loss": 0.1696, "step": 6091 }, { "epoch": 0.8150923200428151, "grad_norm": 32.53496170043945, "learning_rate": 1.849076799571849e-07, "loss": 0.3871, "step": 6092 }, { "epoch": 0.8152261172063152, "grad_norm": 53.08107376098633, "learning_rate": 1.8477388279368478e-07, "loss": 0.3892, "step": 6093 }, { "epoch": 0.8153599143698154, "grad_norm": 41.831153869628906, "learning_rate": 1.8464008563018463e-07, "loss": 0.2866, "step": 6094 }, { "epoch": 0.8154937115333155, "grad_norm": 22.844221115112305, "learning_rate": 1.845062884666845e-07, "loss": 0.1915, "step": 6095 }, { "epoch": 0.8156275086968157, "grad_norm": 17.485618591308594, "learning_rate": 1.8437249130318437e-07, "loss": 0.1598, "step": 6096 }, { "epoch": 0.8157613058603158, "grad_norm": 33.887718200683594, "learning_rate": 1.8423869413968424e-07, "loss": 0.3566, "step": 6097 }, { "epoch": 0.8158951030238158, "grad_norm": 41.16426086425781, "learning_rate": 1.841048969761841e-07, "loss": 0.2951, "step": 6098 }, { "epoch": 0.816028900187316, "grad_norm": 28.72146224975586, "learning_rate": 1.8397109981268398e-07, "loss": 0.3252, "step": 6099 }, { "epoch": 0.8161626973508161, "grad_norm": 42.87639236450195, "learning_rate": 1.8383730264918383e-07, "loss": 0.472, "step": 6100 }, { "epoch": 0.8162964945143163, "grad_norm": 48.802799224853516, "learning_rate": 1.837035054856837e-07, "loss": 0.506, "step": 6101 }, { "epoch": 0.8164302916778164, "grad_norm": 28.74397850036621, "learning_rate": 1.835697083221836e-07, "loss": 0.2632, "step": 6102 }, { "epoch": 0.8165640888413166, "grad_norm": 50.28133773803711, "learning_rate": 1.8343591115868341e-07, "loss": 0.5281, "step": 6103 }, { "epoch": 0.8166978860048167, "grad_norm": 24.451642990112305, "learning_rate": 1.833021139951833e-07, "loss": 0.2424, "step": 6104 }, { "epoch": 0.8168316831683168, "grad_norm": 34.295143127441406, "learning_rate": 1.8316831683168315e-07, "loss": 0.4089, "step": 6105 }, { "epoch": 0.8169654803318169, "grad_norm": 38.12445831298828, "learning_rate": 1.8303451966818303e-07, "loss": 0.283, "step": 6106 }, { "epoch": 0.8170992774953171, "grad_norm": 51.79294967651367, "learning_rate": 1.8290072250468288e-07, "loss": 0.3346, "step": 6107 }, { "epoch": 0.8172330746588172, "grad_norm": 33.998435974121094, "learning_rate": 1.8276692534118276e-07, "loss": 0.3796, "step": 6108 }, { "epoch": 0.8173668718223174, "grad_norm": 28.781665802001953, "learning_rate": 1.826331281776826e-07, "loss": 0.294, "step": 6109 }, { "epoch": 0.8175006689858175, "grad_norm": 44.059329986572266, "learning_rate": 1.824993310141825e-07, "loss": 0.301, "step": 6110 }, { "epoch": 0.8176344661493177, "grad_norm": 35.96464157104492, "learning_rate": 1.8236553385068234e-07, "loss": 0.4979, "step": 6111 }, { "epoch": 0.8177682633128178, "grad_norm": 23.254514694213867, "learning_rate": 1.8223173668718222e-07, "loss": 0.2447, "step": 6112 }, { "epoch": 0.8179020604763179, "grad_norm": 21.731159210205078, "learning_rate": 1.8209793952368207e-07, "loss": 0.1477, "step": 6113 }, { "epoch": 0.818035857639818, "grad_norm": 84.14308166503906, "learning_rate": 1.8196414236018195e-07, "loss": 0.8197, "step": 6114 }, { "epoch": 0.8181696548033182, "grad_norm": 24.041913986206055, "learning_rate": 1.8183034519668183e-07, "loss": 0.2033, "step": 6115 }, { "epoch": 0.8183034519668183, "grad_norm": 44.338748931884766, "learning_rate": 1.8169654803318169e-07, "loss": 0.2405, "step": 6116 }, { "epoch": 0.8184372491303185, "grad_norm": 35.3022575378418, "learning_rate": 1.8156275086968157e-07, "loss": 0.5574, "step": 6117 }, { "epoch": 0.8185710462938186, "grad_norm": 72.69999694824219, "learning_rate": 1.8142895370618142e-07, "loss": 0.5781, "step": 6118 }, { "epoch": 0.8187048434573188, "grad_norm": 28.206357955932617, "learning_rate": 1.812951565426813e-07, "loss": 0.194, "step": 6119 }, { "epoch": 0.8188386406208188, "grad_norm": 48.75450134277344, "learning_rate": 1.8116135937918115e-07, "loss": 0.3639, "step": 6120 }, { "epoch": 0.8189724377843189, "grad_norm": 19.582319259643555, "learning_rate": 1.8102756221568103e-07, "loss": 0.2779, "step": 6121 }, { "epoch": 0.8191062349478191, "grad_norm": 36.88709259033203, "learning_rate": 1.8089376505218088e-07, "loss": 0.4088, "step": 6122 }, { "epoch": 0.8192400321113192, "grad_norm": 48.56965255737305, "learning_rate": 1.8075996788868076e-07, "loss": 0.5452, "step": 6123 }, { "epoch": 0.8193738292748194, "grad_norm": 60.62112808227539, "learning_rate": 1.8062617072518061e-07, "loss": 0.5807, "step": 6124 }, { "epoch": 0.8195076264383195, "grad_norm": 24.220096588134766, "learning_rate": 1.804923735616805e-07, "loss": 0.1775, "step": 6125 }, { "epoch": 0.8196414236018197, "grad_norm": 50.71894836425781, "learning_rate": 1.8035857639818037e-07, "loss": 0.4944, "step": 6126 }, { "epoch": 0.8197752207653197, "grad_norm": 28.62883186340332, "learning_rate": 1.8022477923468023e-07, "loss": 0.2682, "step": 6127 }, { "epoch": 0.8199090179288199, "grad_norm": 44.96043395996094, "learning_rate": 1.800909820711801e-07, "loss": 0.2531, "step": 6128 }, { "epoch": 0.82004281509232, "grad_norm": 20.357906341552734, "learning_rate": 1.7995718490767993e-07, "loss": 0.1707, "step": 6129 }, { "epoch": 0.8201766122558202, "grad_norm": 32.46501541137695, "learning_rate": 1.798233877441798e-07, "loss": 0.3759, "step": 6130 }, { "epoch": 0.8203104094193203, "grad_norm": 61.023555755615234, "learning_rate": 1.7968959058067966e-07, "loss": 0.5307, "step": 6131 }, { "epoch": 0.8204442065828205, "grad_norm": 31.55989646911621, "learning_rate": 1.7955579341717954e-07, "loss": 0.2202, "step": 6132 }, { "epoch": 0.8205780037463206, "grad_norm": 36.62981414794922, "learning_rate": 1.794219962536794e-07, "loss": 0.4133, "step": 6133 }, { "epoch": 0.8207118009098208, "grad_norm": 44.205101013183594, "learning_rate": 1.7928819909017927e-07, "loss": 0.4736, "step": 6134 }, { "epoch": 0.8208455980733208, "grad_norm": 28.609485626220703, "learning_rate": 1.7915440192667913e-07, "loss": 0.2778, "step": 6135 }, { "epoch": 0.820979395236821, "grad_norm": 36.12577819824219, "learning_rate": 1.79020604763179e-07, "loss": 0.2668, "step": 6136 }, { "epoch": 0.8211131924003211, "grad_norm": 74.00524139404297, "learning_rate": 1.7888680759967889e-07, "loss": 0.7507, "step": 6137 }, { "epoch": 0.8212469895638213, "grad_norm": 38.8342170715332, "learning_rate": 1.7875301043617874e-07, "loss": 0.1824, "step": 6138 }, { "epoch": 0.8213807867273214, "grad_norm": 40.09911346435547, "learning_rate": 1.7861921327267862e-07, "loss": 0.4112, "step": 6139 }, { "epoch": 0.8215145838908215, "grad_norm": 22.286426544189453, "learning_rate": 1.7848541610917847e-07, "loss": 0.1968, "step": 6140 }, { "epoch": 0.8216483810543217, "grad_norm": 30.969188690185547, "learning_rate": 1.7835161894567835e-07, "loss": 0.3316, "step": 6141 }, { "epoch": 0.8217821782178217, "grad_norm": 53.69057846069336, "learning_rate": 1.782178217821782e-07, "loss": 0.4683, "step": 6142 }, { "epoch": 0.8219159753813219, "grad_norm": 41.990421295166016, "learning_rate": 1.7808402461867808e-07, "loss": 0.5311, "step": 6143 }, { "epoch": 0.822049772544822, "grad_norm": 27.19840431213379, "learning_rate": 1.7795022745517793e-07, "loss": 0.2601, "step": 6144 }, { "epoch": 0.8221835697083222, "grad_norm": 65.52998352050781, "learning_rate": 1.7781643029167781e-07, "loss": 0.4411, "step": 6145 }, { "epoch": 0.8223173668718223, "grad_norm": 33.75215530395508, "learning_rate": 1.7768263312817767e-07, "loss": 0.2625, "step": 6146 }, { "epoch": 0.8224511640353225, "grad_norm": 25.811662673950195, "learning_rate": 1.7754883596467755e-07, "loss": 0.2023, "step": 6147 }, { "epoch": 0.8225849611988226, "grad_norm": 56.60342025756836, "learning_rate": 1.7741503880117743e-07, "loss": 0.3933, "step": 6148 }, { "epoch": 0.8227187583623227, "grad_norm": 31.85065460205078, "learning_rate": 1.7728124163767728e-07, "loss": 0.2818, "step": 6149 }, { "epoch": 0.8228525555258228, "grad_norm": 49.355533599853516, "learning_rate": 1.7714744447417716e-07, "loss": 0.4734, "step": 6150 }, { "epoch": 0.822986352689323, "grad_norm": 30.70903778076172, "learning_rate": 1.77013647310677e-07, "loss": 0.2141, "step": 6151 }, { "epoch": 0.8231201498528231, "grad_norm": 60.25900650024414, "learning_rate": 1.768798501471769e-07, "loss": 0.4957, "step": 6152 }, { "epoch": 0.8232539470163233, "grad_norm": 34.09272384643555, "learning_rate": 1.7674605298367674e-07, "loss": 0.3236, "step": 6153 }, { "epoch": 0.8233877441798234, "grad_norm": 42.87863540649414, "learning_rate": 1.7661225582017662e-07, "loss": 0.1877, "step": 6154 }, { "epoch": 0.8235215413433236, "grad_norm": 48.57933044433594, "learning_rate": 1.7647845865667645e-07, "loss": 0.3946, "step": 6155 }, { "epoch": 0.8236553385068237, "grad_norm": 31.03558921813965, "learning_rate": 1.7634466149317633e-07, "loss": 0.2786, "step": 6156 }, { "epoch": 0.8237891356703237, "grad_norm": 38.52149200439453, "learning_rate": 1.7621086432967618e-07, "loss": 0.3843, "step": 6157 }, { "epoch": 0.8239229328338239, "grad_norm": 28.696670532226562, "learning_rate": 1.7607706716617606e-07, "loss": 0.2644, "step": 6158 }, { "epoch": 0.824056729997324, "grad_norm": 41.2546501159668, "learning_rate": 1.7594327000267594e-07, "loss": 0.4564, "step": 6159 }, { "epoch": 0.8241905271608242, "grad_norm": 47.266483306884766, "learning_rate": 1.758094728391758e-07, "loss": 0.443, "step": 6160 }, { "epoch": 0.8243243243243243, "grad_norm": 39.705223083496094, "learning_rate": 1.7567567567567567e-07, "loss": 0.1658, "step": 6161 }, { "epoch": 0.8244581214878245, "grad_norm": 32.57626724243164, "learning_rate": 1.7554187851217552e-07, "loss": 0.2214, "step": 6162 }, { "epoch": 0.8245919186513246, "grad_norm": 53.30255126953125, "learning_rate": 1.754080813486754e-07, "loss": 0.7185, "step": 6163 }, { "epoch": 0.8247257158148247, "grad_norm": 39.08267593383789, "learning_rate": 1.7527428418517526e-07, "loss": 0.4282, "step": 6164 }, { "epoch": 0.8248595129783248, "grad_norm": 23.349132537841797, "learning_rate": 1.7514048702167513e-07, "loss": 0.1921, "step": 6165 }, { "epoch": 0.824993310141825, "grad_norm": 40.54816436767578, "learning_rate": 1.75006689858175e-07, "loss": 0.2823, "step": 6166 }, { "epoch": 0.8251271073053251, "grad_norm": 62.57420349121094, "learning_rate": 1.7487289269467487e-07, "loss": 0.604, "step": 6167 }, { "epoch": 0.8252609044688253, "grad_norm": 30.469829559326172, "learning_rate": 1.7473909553117472e-07, "loss": 0.181, "step": 6168 }, { "epoch": 0.8253947016323254, "grad_norm": 34.83667755126953, "learning_rate": 1.746052983676746e-07, "loss": 0.3457, "step": 6169 }, { "epoch": 0.8255284987958256, "grad_norm": 53.78257751464844, "learning_rate": 1.7447150120417448e-07, "loss": 0.5774, "step": 6170 }, { "epoch": 0.8256622959593256, "grad_norm": 43.64907455444336, "learning_rate": 1.7433770404067433e-07, "loss": 0.4553, "step": 6171 }, { "epoch": 0.8257960931228258, "grad_norm": 51.54743957519531, "learning_rate": 1.742039068771742e-07, "loss": 0.5286, "step": 6172 }, { "epoch": 0.8259298902863259, "grad_norm": 34.45227813720703, "learning_rate": 1.7407010971367406e-07, "loss": 0.2702, "step": 6173 }, { "epoch": 0.8260636874498261, "grad_norm": 25.838088989257812, "learning_rate": 1.7393631255017394e-07, "loss": 0.2846, "step": 6174 }, { "epoch": 0.8261974846133262, "grad_norm": 45.80366134643555, "learning_rate": 1.738025153866738e-07, "loss": 0.5051, "step": 6175 }, { "epoch": 0.8263312817768264, "grad_norm": 30.278186798095703, "learning_rate": 1.7366871822317367e-07, "loss": 0.415, "step": 6176 }, { "epoch": 0.8264650789403265, "grad_norm": 27.973796844482422, "learning_rate": 1.7353492105967353e-07, "loss": 0.1732, "step": 6177 }, { "epoch": 0.8265988761038267, "grad_norm": 40.62730026245117, "learning_rate": 1.734011238961734e-07, "loss": 0.2659, "step": 6178 }, { "epoch": 0.8267326732673267, "grad_norm": 45.56938171386719, "learning_rate": 1.7326732673267326e-07, "loss": 0.5085, "step": 6179 }, { "epoch": 0.8268664704308268, "grad_norm": 58.94655227661133, "learning_rate": 1.7313352956917314e-07, "loss": 0.5064, "step": 6180 }, { "epoch": 0.827000267594327, "grad_norm": 39.64093780517578, "learning_rate": 1.7299973240567302e-07, "loss": 0.3154, "step": 6181 }, { "epoch": 0.8271340647578271, "grad_norm": 17.605384826660156, "learning_rate": 1.7286593524217284e-07, "loss": 0.1933, "step": 6182 }, { "epoch": 0.8272678619213273, "grad_norm": 54.55253601074219, "learning_rate": 1.7273213807867272e-07, "loss": 0.362, "step": 6183 }, { "epoch": 0.8274016590848274, "grad_norm": 46.921180725097656, "learning_rate": 1.7259834091517258e-07, "loss": 0.433, "step": 6184 }, { "epoch": 0.8275354562483276, "grad_norm": 37.819602966308594, "learning_rate": 1.7246454375167246e-07, "loss": 0.3842, "step": 6185 }, { "epoch": 0.8276692534118276, "grad_norm": 25.78912925720215, "learning_rate": 1.723307465881723e-07, "loss": 0.2582, "step": 6186 }, { "epoch": 0.8278030505753278, "grad_norm": 59.37933349609375, "learning_rate": 1.721969494246722e-07, "loss": 0.4853, "step": 6187 }, { "epoch": 0.8279368477388279, "grad_norm": 42.33549118041992, "learning_rate": 1.7206315226117204e-07, "loss": 0.3654, "step": 6188 }, { "epoch": 0.8280706449023281, "grad_norm": 49.23624801635742, "learning_rate": 1.7192935509767192e-07, "loss": 0.4437, "step": 6189 }, { "epoch": 0.8282044420658282, "grad_norm": 40.62432861328125, "learning_rate": 1.7179555793417177e-07, "loss": 0.3015, "step": 6190 }, { "epoch": 0.8283382392293284, "grad_norm": 24.538196563720703, "learning_rate": 1.7166176077067165e-07, "loss": 0.4941, "step": 6191 }, { "epoch": 0.8284720363928285, "grad_norm": 66.46969604492188, "learning_rate": 1.7152796360717153e-07, "loss": 0.4481, "step": 6192 }, { "epoch": 0.8286058335563286, "grad_norm": 26.800785064697266, "learning_rate": 1.7139416644367138e-07, "loss": 0.3401, "step": 6193 }, { "epoch": 0.8287396307198287, "grad_norm": 25.665817260742188, "learning_rate": 1.7126036928017126e-07, "loss": 0.169, "step": 6194 }, { "epoch": 0.8288734278833289, "grad_norm": 50.04769515991211, "learning_rate": 1.7112657211667112e-07, "loss": 0.152, "step": 6195 }, { "epoch": 0.829007225046829, "grad_norm": 33.93638610839844, "learning_rate": 1.70992774953171e-07, "loss": 0.3754, "step": 6196 }, { "epoch": 0.8291410222103291, "grad_norm": 59.36198806762695, "learning_rate": 1.7085897778967085e-07, "loss": 0.4698, "step": 6197 }, { "epoch": 0.8292748193738293, "grad_norm": 62.23344421386719, "learning_rate": 1.7072518062617073e-07, "loss": 0.5148, "step": 6198 }, { "epoch": 0.8294086165373294, "grad_norm": 44.44648361206055, "learning_rate": 1.7059138346267058e-07, "loss": 0.5419, "step": 6199 }, { "epoch": 0.8295424137008296, "grad_norm": 39.745521545410156, "learning_rate": 1.7045758629917046e-07, "loss": 0.453, "step": 6200 }, { "epoch": 0.8296762108643296, "grad_norm": 66.510498046875, "learning_rate": 1.703237891356703e-07, "loss": 0.3703, "step": 6201 }, { "epoch": 0.8298100080278298, "grad_norm": 45.59667205810547, "learning_rate": 1.701899919721702e-07, "loss": 0.4712, "step": 6202 }, { "epoch": 0.8299438051913299, "grad_norm": 34.194522857666016, "learning_rate": 1.7005619480867007e-07, "loss": 0.364, "step": 6203 }, { "epoch": 0.8300776023548301, "grad_norm": 38.712242126464844, "learning_rate": 1.6992239764516992e-07, "loss": 0.2711, "step": 6204 }, { "epoch": 0.8302113995183302, "grad_norm": 36.77295684814453, "learning_rate": 1.697886004816698e-07, "loss": 0.37, "step": 6205 }, { "epoch": 0.8303451966818304, "grad_norm": 35.999088287353516, "learning_rate": 1.6965480331816966e-07, "loss": 0.3958, "step": 6206 }, { "epoch": 0.8304789938453305, "grad_norm": 31.82061195373535, "learning_rate": 1.695210061546695e-07, "loss": 0.2651, "step": 6207 }, { "epoch": 0.8306127910088306, "grad_norm": 25.078920364379883, "learning_rate": 1.6938720899116936e-07, "loss": 0.1842, "step": 6208 }, { "epoch": 0.8307465881723307, "grad_norm": 24.13239860534668, "learning_rate": 1.6925341182766924e-07, "loss": 0.1901, "step": 6209 }, { "epoch": 0.8308803853358309, "grad_norm": 46.7823486328125, "learning_rate": 1.691196146641691e-07, "loss": 0.3589, "step": 6210 }, { "epoch": 0.831014182499331, "grad_norm": 43.93208312988281, "learning_rate": 1.6898581750066897e-07, "loss": 0.3741, "step": 6211 }, { "epoch": 0.8311479796628312, "grad_norm": 26.55392074584961, "learning_rate": 1.6885202033716883e-07, "loss": 0.2353, "step": 6212 }, { "epoch": 0.8312817768263313, "grad_norm": 43.39262390136719, "learning_rate": 1.687182231736687e-07, "loss": 0.3952, "step": 6213 }, { "epoch": 0.8314155739898315, "grad_norm": 35.68336868286133, "learning_rate": 1.6858442601016858e-07, "loss": 0.3159, "step": 6214 }, { "epoch": 0.8315493711533315, "grad_norm": 36.8607063293457, "learning_rate": 1.6845062884666844e-07, "loss": 0.3562, "step": 6215 }, { "epoch": 0.8316831683168316, "grad_norm": 42.647682189941406, "learning_rate": 1.6831683168316832e-07, "loss": 0.4923, "step": 6216 }, { "epoch": 0.8318169654803318, "grad_norm": 24.47417640686035, "learning_rate": 1.6818303451966817e-07, "loss": 0.2525, "step": 6217 }, { "epoch": 0.8319507626438319, "grad_norm": 19.7694091796875, "learning_rate": 1.6804923735616805e-07, "loss": 0.1996, "step": 6218 }, { "epoch": 0.8320845598073321, "grad_norm": 51.67018508911133, "learning_rate": 1.679154401926679e-07, "loss": 0.2984, "step": 6219 }, { "epoch": 0.8322183569708322, "grad_norm": 40.49369430541992, "learning_rate": 1.6778164302916778e-07, "loss": 0.2703, "step": 6220 }, { "epoch": 0.8323521541343324, "grad_norm": 25.629560470581055, "learning_rate": 1.6764784586566763e-07, "loss": 0.2399, "step": 6221 }, { "epoch": 0.8324859512978325, "grad_norm": 48.38799285888672, "learning_rate": 1.675140487021675e-07, "loss": 0.323, "step": 6222 }, { "epoch": 0.8326197484613326, "grad_norm": 22.3282413482666, "learning_rate": 1.6738025153866736e-07, "loss": 0.1987, "step": 6223 }, { "epoch": 0.8327535456248327, "grad_norm": 41.89963150024414, "learning_rate": 1.6724645437516724e-07, "loss": 0.3506, "step": 6224 }, { "epoch": 0.8328873427883329, "grad_norm": 50.5027961730957, "learning_rate": 1.6711265721166712e-07, "loss": 0.4552, "step": 6225 }, { "epoch": 0.833021139951833, "grad_norm": 38.56074523925781, "learning_rate": 1.6697886004816698e-07, "loss": 0.5424, "step": 6226 }, { "epoch": 0.8331549371153332, "grad_norm": 32.44462585449219, "learning_rate": 1.6684506288466686e-07, "loss": 0.2809, "step": 6227 }, { "epoch": 0.8332887342788333, "grad_norm": 45.015384674072266, "learning_rate": 1.667112657211667e-07, "loss": 0.3817, "step": 6228 }, { "epoch": 0.8334225314423335, "grad_norm": 45.13410568237305, "learning_rate": 1.665774685576666e-07, "loss": 0.2667, "step": 6229 }, { "epoch": 0.8335563286058335, "grad_norm": 47.32558822631836, "learning_rate": 1.6644367139416644e-07, "loss": 0.5116, "step": 6230 }, { "epoch": 0.8336901257693337, "grad_norm": 43.13581466674805, "learning_rate": 1.6630987423066632e-07, "loss": 0.3916, "step": 6231 }, { "epoch": 0.8338239229328338, "grad_norm": 41.0721321105957, "learning_rate": 1.6617607706716617e-07, "loss": 0.32, "step": 6232 }, { "epoch": 0.833957720096334, "grad_norm": 33.68947219848633, "learning_rate": 1.6604227990366603e-07, "loss": 0.395, "step": 6233 }, { "epoch": 0.8340915172598341, "grad_norm": 30.23662567138672, "learning_rate": 1.6590848274016588e-07, "loss": 0.3392, "step": 6234 }, { "epoch": 0.8342253144233343, "grad_norm": 40.34410095214844, "learning_rate": 1.6577468557666576e-07, "loss": 0.3174, "step": 6235 }, { "epoch": 0.8343591115868344, "grad_norm": 49.372623443603516, "learning_rate": 1.6564088841316564e-07, "loss": 0.4863, "step": 6236 }, { "epoch": 0.8344929087503344, "grad_norm": 62.542354583740234, "learning_rate": 1.655070912496655e-07, "loss": 0.396, "step": 6237 }, { "epoch": 0.8346267059138346, "grad_norm": 45.111480712890625, "learning_rate": 1.6537329408616537e-07, "loss": 0.5275, "step": 6238 }, { "epoch": 0.8347605030773347, "grad_norm": 36.83749771118164, "learning_rate": 1.6523949692266522e-07, "loss": 0.4451, "step": 6239 }, { "epoch": 0.8348943002408349, "grad_norm": 31.181129455566406, "learning_rate": 1.651056997591651e-07, "loss": 0.2916, "step": 6240 }, { "epoch": 0.835028097404335, "grad_norm": 21.994844436645508, "learning_rate": 1.6497190259566495e-07, "loss": 0.2081, "step": 6241 }, { "epoch": 0.8351618945678352, "grad_norm": 38.34349822998047, "learning_rate": 1.6483810543216483e-07, "loss": 0.3396, "step": 6242 }, { "epoch": 0.8352956917313353, "grad_norm": 49.20677947998047, "learning_rate": 1.6470430826866469e-07, "loss": 0.4154, "step": 6243 }, { "epoch": 0.8354294888948355, "grad_norm": 35.661537170410156, "learning_rate": 1.6457051110516456e-07, "loss": 0.3731, "step": 6244 }, { "epoch": 0.8355632860583355, "grad_norm": 72.37418365478516, "learning_rate": 1.6443671394166442e-07, "loss": 0.6936, "step": 6245 }, { "epoch": 0.8356970832218357, "grad_norm": 40.43620681762695, "learning_rate": 1.643029167781643e-07, "loss": 0.3879, "step": 6246 }, { "epoch": 0.8358308803853358, "grad_norm": 38.19389343261719, "learning_rate": 1.6416911961466418e-07, "loss": 0.3959, "step": 6247 }, { "epoch": 0.835964677548836, "grad_norm": 57.480316162109375, "learning_rate": 1.6403532245116403e-07, "loss": 0.5171, "step": 6248 }, { "epoch": 0.8360984747123361, "grad_norm": 33.64214324951172, "learning_rate": 1.639015252876639e-07, "loss": 0.3076, "step": 6249 }, { "epoch": 0.8362322718758363, "grad_norm": 50.50379180908203, "learning_rate": 1.6376772812416376e-07, "loss": 0.4231, "step": 6250 }, { "epoch": 0.8363660690393364, "grad_norm": 21.317720413208008, "learning_rate": 1.6363393096066364e-07, "loss": 0.1144, "step": 6251 }, { "epoch": 0.8364998662028365, "grad_norm": 38.18715286254883, "learning_rate": 1.635001337971635e-07, "loss": 0.3833, "step": 6252 }, { "epoch": 0.8366336633663366, "grad_norm": 43.29279327392578, "learning_rate": 1.6336633663366337e-07, "loss": 0.4418, "step": 6253 }, { "epoch": 0.8367674605298367, "grad_norm": 39.740177154541016, "learning_rate": 1.6323253947016323e-07, "loss": 0.2175, "step": 6254 }, { "epoch": 0.8369012576933369, "grad_norm": 21.853357315063477, "learning_rate": 1.630987423066631e-07, "loss": 0.2068, "step": 6255 }, { "epoch": 0.837035054856837, "grad_norm": 36.203487396240234, "learning_rate": 1.6296494514316296e-07, "loss": 0.4156, "step": 6256 }, { "epoch": 0.8371688520203372, "grad_norm": 38.55232620239258, "learning_rate": 1.6283114797966284e-07, "loss": 0.3189, "step": 6257 }, { "epoch": 0.8373026491838373, "grad_norm": 39.73656463623047, "learning_rate": 1.626973508161627e-07, "loss": 0.3031, "step": 6258 }, { "epoch": 0.8374364463473374, "grad_norm": 25.81041717529297, "learning_rate": 1.6256355365266254e-07, "loss": 0.2707, "step": 6259 }, { "epoch": 0.8375702435108375, "grad_norm": 47.631492614746094, "learning_rate": 1.6242975648916242e-07, "loss": 0.3922, "step": 6260 }, { "epoch": 0.8377040406743377, "grad_norm": 39.50155258178711, "learning_rate": 1.6229595932566227e-07, "loss": 0.4518, "step": 6261 }, { "epoch": 0.8378378378378378, "grad_norm": 50.50861358642578, "learning_rate": 1.6216216216216215e-07, "loss": 0.5727, "step": 6262 }, { "epoch": 0.837971635001338, "grad_norm": 38.597900390625, "learning_rate": 1.62028364998662e-07, "loss": 0.2686, "step": 6263 }, { "epoch": 0.8381054321648381, "grad_norm": 25.869787216186523, "learning_rate": 1.6189456783516189e-07, "loss": 0.2205, "step": 6264 }, { "epoch": 0.8382392293283383, "grad_norm": 25.557931900024414, "learning_rate": 1.6176077067166174e-07, "loss": 0.238, "step": 6265 }, { "epoch": 0.8383730264918384, "grad_norm": 29.84481430053711, "learning_rate": 1.6162697350816162e-07, "loss": 0.2083, "step": 6266 }, { "epoch": 0.8385068236553385, "grad_norm": 27.187318801879883, "learning_rate": 1.6149317634466147e-07, "loss": 0.1769, "step": 6267 }, { "epoch": 0.8386406208188386, "grad_norm": 40.26195526123047, "learning_rate": 1.6135937918116135e-07, "loss": 0.4312, "step": 6268 }, { "epoch": 0.8387744179823388, "grad_norm": 28.255990982055664, "learning_rate": 1.612255820176612e-07, "loss": 0.269, "step": 6269 }, { "epoch": 0.8389082151458389, "grad_norm": 43.607383728027344, "learning_rate": 1.6109178485416108e-07, "loss": 0.5681, "step": 6270 }, { "epoch": 0.8390420123093391, "grad_norm": 33.934104919433594, "learning_rate": 1.6095798769066096e-07, "loss": 0.3822, "step": 6271 }, { "epoch": 0.8391758094728392, "grad_norm": 27.50776481628418, "learning_rate": 1.6082419052716081e-07, "loss": 0.2541, "step": 6272 }, { "epoch": 0.8393096066363394, "grad_norm": 43.75215530395508, "learning_rate": 1.606903933636607e-07, "loss": 0.3815, "step": 6273 }, { "epoch": 0.8394434037998394, "grad_norm": 31.349924087524414, "learning_rate": 1.6055659620016055e-07, "loss": 0.2452, "step": 6274 }, { "epoch": 0.8395772009633395, "grad_norm": 44.729713439941406, "learning_rate": 1.6042279903666043e-07, "loss": 0.2184, "step": 6275 }, { "epoch": 0.8397109981268397, "grad_norm": 42.71576690673828, "learning_rate": 1.6028900187316028e-07, "loss": 0.3196, "step": 6276 }, { "epoch": 0.8398447952903398, "grad_norm": 47.12224197387695, "learning_rate": 1.6015520470966016e-07, "loss": 0.435, "step": 6277 }, { "epoch": 0.83997859245384, "grad_norm": 18.61286735534668, "learning_rate": 1.6002140754616e-07, "loss": 0.1593, "step": 6278 }, { "epoch": 0.8401123896173401, "grad_norm": 58.96697998046875, "learning_rate": 1.598876103826599e-07, "loss": 0.4382, "step": 6279 }, { "epoch": 0.8402461867808403, "grad_norm": 73.46147918701172, "learning_rate": 1.5975381321915974e-07, "loss": 0.4827, "step": 6280 }, { "epoch": 0.8403799839443403, "grad_norm": 27.110980987548828, "learning_rate": 1.5962001605565962e-07, "loss": 0.3293, "step": 6281 }, { "epoch": 0.8405137811078405, "grad_norm": 49.98105239868164, "learning_rate": 1.594862188921595e-07, "loss": 0.5275, "step": 6282 }, { "epoch": 0.8406475782713406, "grad_norm": 74.9874038696289, "learning_rate": 1.5935242172865935e-07, "loss": 0.6363, "step": 6283 }, { "epoch": 0.8407813754348408, "grad_norm": 36.34716796875, "learning_rate": 1.5921862456515923e-07, "loss": 0.4045, "step": 6284 }, { "epoch": 0.8409151725983409, "grad_norm": 36.85601806640625, "learning_rate": 1.5908482740165906e-07, "loss": 0.2877, "step": 6285 }, { "epoch": 0.8410489697618411, "grad_norm": 63.3618278503418, "learning_rate": 1.5895103023815894e-07, "loss": 0.7047, "step": 6286 }, { "epoch": 0.8411827669253412, "grad_norm": 48.59368896484375, "learning_rate": 1.588172330746588e-07, "loss": 0.5772, "step": 6287 }, { "epoch": 0.8413165640888414, "grad_norm": 16.70391845703125, "learning_rate": 1.5868343591115867e-07, "loss": 0.2204, "step": 6288 }, { "epoch": 0.8414503612523414, "grad_norm": 29.452566146850586, "learning_rate": 1.5854963874765852e-07, "loss": 0.2926, "step": 6289 }, { "epoch": 0.8415841584158416, "grad_norm": 22.74224090576172, "learning_rate": 1.584158415841584e-07, "loss": 0.2134, "step": 6290 }, { "epoch": 0.8417179555793417, "grad_norm": 41.80674362182617, "learning_rate": 1.5828204442065826e-07, "loss": 0.3798, "step": 6291 }, { "epoch": 0.8418517527428419, "grad_norm": 33.03679656982422, "learning_rate": 1.5814824725715813e-07, "loss": 0.3578, "step": 6292 }, { "epoch": 0.841985549906342, "grad_norm": 49.02288055419922, "learning_rate": 1.5801445009365801e-07, "loss": 0.4244, "step": 6293 }, { "epoch": 0.8421193470698422, "grad_norm": 32.893943786621094, "learning_rate": 1.5788065293015787e-07, "loss": 0.4171, "step": 6294 }, { "epoch": 0.8422531442333423, "grad_norm": 35.72271728515625, "learning_rate": 1.5774685576665775e-07, "loss": 0.3119, "step": 6295 }, { "epoch": 0.8423869413968423, "grad_norm": 45.639503479003906, "learning_rate": 1.576130586031576e-07, "loss": 0.2885, "step": 6296 }, { "epoch": 0.8425207385603425, "grad_norm": 28.993602752685547, "learning_rate": 1.5747926143965748e-07, "loss": 0.3309, "step": 6297 }, { "epoch": 0.8426545357238426, "grad_norm": 44.815025329589844, "learning_rate": 1.5734546427615733e-07, "loss": 0.383, "step": 6298 }, { "epoch": 0.8427883328873428, "grad_norm": 41.03893280029297, "learning_rate": 1.572116671126572e-07, "loss": 0.3151, "step": 6299 }, { "epoch": 0.8429221300508429, "grad_norm": 45.032920837402344, "learning_rate": 1.5707786994915706e-07, "loss": 0.5393, "step": 6300 }, { "epoch": 0.8430559272143431, "grad_norm": 47.79743576049805, "learning_rate": 1.5694407278565694e-07, "loss": 0.5098, "step": 6301 }, { "epoch": 0.8431897243778432, "grad_norm": 33.27236557006836, "learning_rate": 1.568102756221568e-07, "loss": 0.4683, "step": 6302 }, { "epoch": 0.8433235215413433, "grad_norm": 38.679134368896484, "learning_rate": 1.5667647845865667e-07, "loss": 0.5284, "step": 6303 }, { "epoch": 0.8434573187048434, "grad_norm": 37.239261627197266, "learning_rate": 1.5654268129515655e-07, "loss": 0.1753, "step": 6304 }, { "epoch": 0.8435911158683436, "grad_norm": 24.81366729736328, "learning_rate": 1.564088841316564e-07, "loss": 0.2497, "step": 6305 }, { "epoch": 0.8437249130318437, "grad_norm": 44.864234924316406, "learning_rate": 1.5627508696815629e-07, "loss": 0.6431, "step": 6306 }, { "epoch": 0.8438587101953439, "grad_norm": 24.333742141723633, "learning_rate": 1.5614128980465614e-07, "loss": 0.3141, "step": 6307 }, { "epoch": 0.843992507358844, "grad_norm": 53.38302993774414, "learning_rate": 1.5600749264115602e-07, "loss": 0.2837, "step": 6308 }, { "epoch": 0.8441263045223442, "grad_norm": 57.33583450317383, "learning_rate": 1.5587369547765587e-07, "loss": 0.6044, "step": 6309 }, { "epoch": 0.8442601016858443, "grad_norm": 18.776643753051758, "learning_rate": 1.5573989831415575e-07, "loss": 0.1604, "step": 6310 }, { "epoch": 0.8443938988493443, "grad_norm": 37.198360443115234, "learning_rate": 1.5560610115065558e-07, "loss": 0.3463, "step": 6311 }, { "epoch": 0.8445276960128445, "grad_norm": 46.60981750488281, "learning_rate": 1.5547230398715546e-07, "loss": 0.3037, "step": 6312 }, { "epoch": 0.8446614931763446, "grad_norm": 38.582252502441406, "learning_rate": 1.553385068236553e-07, "loss": 0.3027, "step": 6313 }, { "epoch": 0.8447952903398448, "grad_norm": 37.472408294677734, "learning_rate": 1.552047096601552e-07, "loss": 0.4045, "step": 6314 }, { "epoch": 0.8449290875033449, "grad_norm": 34.63056182861328, "learning_rate": 1.5507091249665507e-07, "loss": 0.2533, "step": 6315 }, { "epoch": 0.8450628846668451, "grad_norm": 57.74671173095703, "learning_rate": 1.5493711533315492e-07, "loss": 0.464, "step": 6316 }, { "epoch": 0.8451966818303452, "grad_norm": 40.61345291137695, "learning_rate": 1.548033181696548e-07, "loss": 0.4056, "step": 6317 }, { "epoch": 0.8453304789938453, "grad_norm": 34.993919372558594, "learning_rate": 1.5466952100615465e-07, "loss": 0.3646, "step": 6318 }, { "epoch": 0.8454642761573454, "grad_norm": 28.598995208740234, "learning_rate": 1.5453572384265453e-07, "loss": 0.3847, "step": 6319 }, { "epoch": 0.8455980733208456, "grad_norm": 64.64320373535156, "learning_rate": 1.5440192667915438e-07, "loss": 0.5775, "step": 6320 }, { "epoch": 0.8457318704843457, "grad_norm": 40.70043182373047, "learning_rate": 1.5426812951565426e-07, "loss": 0.3049, "step": 6321 }, { "epoch": 0.8458656676478459, "grad_norm": 22.577877044677734, "learning_rate": 1.5413433235215412e-07, "loss": 0.225, "step": 6322 }, { "epoch": 0.845999464811346, "grad_norm": 36.12928771972656, "learning_rate": 1.54000535188654e-07, "loss": 0.2615, "step": 6323 }, { "epoch": 0.8461332619748462, "grad_norm": 37.61874771118164, "learning_rate": 1.5386673802515385e-07, "loss": 0.3282, "step": 6324 }, { "epoch": 0.8462670591383463, "grad_norm": 23.008071899414062, "learning_rate": 1.5373294086165373e-07, "loss": 0.188, "step": 6325 }, { "epoch": 0.8464008563018464, "grad_norm": 54.26183319091797, "learning_rate": 1.535991436981536e-07, "loss": 0.2508, "step": 6326 }, { "epoch": 0.8465346534653465, "grad_norm": 40.59626388549805, "learning_rate": 1.5346534653465346e-07, "loss": 0.4459, "step": 6327 }, { "epoch": 0.8466684506288467, "grad_norm": 34.44415283203125, "learning_rate": 1.5333154937115334e-07, "loss": 0.2395, "step": 6328 }, { "epoch": 0.8468022477923468, "grad_norm": 25.149349212646484, "learning_rate": 1.531977522076532e-07, "loss": 0.3343, "step": 6329 }, { "epoch": 0.846936044955847, "grad_norm": 34.79765701293945, "learning_rate": 1.5306395504415307e-07, "loss": 0.2569, "step": 6330 }, { "epoch": 0.8470698421193471, "grad_norm": 34.075199127197266, "learning_rate": 1.5293015788065292e-07, "loss": 0.2993, "step": 6331 }, { "epoch": 0.8472036392828473, "grad_norm": 36.49971389770508, "learning_rate": 1.527963607171528e-07, "loss": 0.4016, "step": 6332 }, { "epoch": 0.8473374364463473, "grad_norm": 39.322059631347656, "learning_rate": 1.5266256355365266e-07, "loss": 0.3123, "step": 6333 }, { "epoch": 0.8474712336098474, "grad_norm": 43.861175537109375, "learning_rate": 1.5252876639015253e-07, "loss": 0.4566, "step": 6334 }, { "epoch": 0.8476050307733476, "grad_norm": 30.54399299621582, "learning_rate": 1.523949692266524e-07, "loss": 0.228, "step": 6335 }, { "epoch": 0.8477388279368477, "grad_norm": 41.49220657348633, "learning_rate": 1.5226117206315227e-07, "loss": 0.3998, "step": 6336 }, { "epoch": 0.8478726251003479, "grad_norm": 28.21817970275879, "learning_rate": 1.5212737489965215e-07, "loss": 0.1948, "step": 6337 }, { "epoch": 0.848006422263848, "grad_norm": 24.031341552734375, "learning_rate": 1.5199357773615197e-07, "loss": 0.3772, "step": 6338 }, { "epoch": 0.8481402194273482, "grad_norm": 42.586997985839844, "learning_rate": 1.5185978057265185e-07, "loss": 0.3931, "step": 6339 }, { "epoch": 0.8482740165908482, "grad_norm": 30.517908096313477, "learning_rate": 1.517259834091517e-07, "loss": 0.3453, "step": 6340 }, { "epoch": 0.8484078137543484, "grad_norm": 47.557151794433594, "learning_rate": 1.5159218624565158e-07, "loss": 0.4322, "step": 6341 }, { "epoch": 0.8485416109178485, "grad_norm": 44.286808013916016, "learning_rate": 1.5145838908215144e-07, "loss": 0.4863, "step": 6342 }, { "epoch": 0.8486754080813487, "grad_norm": 41.02444076538086, "learning_rate": 1.5132459191865132e-07, "loss": 0.3916, "step": 6343 }, { "epoch": 0.8488092052448488, "grad_norm": 35.747615814208984, "learning_rate": 1.5119079475515117e-07, "loss": 0.3242, "step": 6344 }, { "epoch": 0.848943002408349, "grad_norm": 49.4937744140625, "learning_rate": 1.5105699759165105e-07, "loss": 0.6888, "step": 6345 }, { "epoch": 0.8490767995718491, "grad_norm": 34.94804000854492, "learning_rate": 1.509232004281509e-07, "loss": 0.3896, "step": 6346 }, { "epoch": 0.8492105967353493, "grad_norm": 28.87388801574707, "learning_rate": 1.5078940326465078e-07, "loss": 0.359, "step": 6347 }, { "epoch": 0.8493443938988493, "grad_norm": 36.86366271972656, "learning_rate": 1.5065560610115066e-07, "loss": 0.2909, "step": 6348 }, { "epoch": 0.8494781910623495, "grad_norm": 49.42009735107422, "learning_rate": 1.505218089376505e-07, "loss": 0.5534, "step": 6349 }, { "epoch": 0.8496119882258496, "grad_norm": 50.5260009765625, "learning_rate": 1.503880117741504e-07, "loss": 0.3363, "step": 6350 }, { "epoch": 0.8497457853893498, "grad_norm": 26.584095001220703, "learning_rate": 1.5025421461065024e-07, "loss": 0.2228, "step": 6351 }, { "epoch": 0.8498795825528499, "grad_norm": 31.611003875732422, "learning_rate": 1.5012041744715012e-07, "loss": 0.4204, "step": 6352 }, { "epoch": 0.85001337971635, "grad_norm": 53.654754638671875, "learning_rate": 1.4998662028364998e-07, "loss": 0.6501, "step": 6353 }, { "epoch": 0.8501471768798502, "grad_norm": 35.13399124145508, "learning_rate": 1.4985282312014986e-07, "loss": 0.3844, "step": 6354 }, { "epoch": 0.8502809740433502, "grad_norm": 42.244300842285156, "learning_rate": 1.497190259566497e-07, "loss": 0.5548, "step": 6355 }, { "epoch": 0.8504147712068504, "grad_norm": 68.45309448242188, "learning_rate": 1.495852287931496e-07, "loss": 0.6193, "step": 6356 }, { "epoch": 0.8505485683703505, "grad_norm": 39.7880859375, "learning_rate": 1.4945143162964944e-07, "loss": 0.5526, "step": 6357 }, { "epoch": 0.8506823655338507, "grad_norm": 36.27630615234375, "learning_rate": 1.4931763446614932e-07, "loss": 0.4079, "step": 6358 }, { "epoch": 0.8508161626973508, "grad_norm": 33.78721237182617, "learning_rate": 1.491838373026492e-07, "loss": 0.4928, "step": 6359 }, { "epoch": 0.850949959860851, "grad_norm": 25.753128051757812, "learning_rate": 1.4905004013914905e-07, "loss": 0.2716, "step": 6360 }, { "epoch": 0.8510837570243511, "grad_norm": 48.503238677978516, "learning_rate": 1.4891624297564893e-07, "loss": 0.2052, "step": 6361 }, { "epoch": 0.8512175541878512, "grad_norm": 45.58009719848633, "learning_rate": 1.4878244581214878e-07, "loss": 0.4072, "step": 6362 }, { "epoch": 0.8513513513513513, "grad_norm": 48.3173713684082, "learning_rate": 1.4864864864864866e-07, "loss": 0.381, "step": 6363 }, { "epoch": 0.8514851485148515, "grad_norm": 34.36640548706055, "learning_rate": 1.485148514851485e-07, "loss": 0.2885, "step": 6364 }, { "epoch": 0.8516189456783516, "grad_norm": 56.73435974121094, "learning_rate": 1.4838105432164837e-07, "loss": 0.4104, "step": 6365 }, { "epoch": 0.8517527428418518, "grad_norm": 24.03229331970215, "learning_rate": 1.4824725715814822e-07, "loss": 0.2641, "step": 6366 }, { "epoch": 0.8518865400053519, "grad_norm": 40.2062873840332, "learning_rate": 1.481134599946481e-07, "loss": 0.4377, "step": 6367 }, { "epoch": 0.8520203371688521, "grad_norm": 28.725692749023438, "learning_rate": 1.4797966283114795e-07, "loss": 0.2877, "step": 6368 }, { "epoch": 0.8521541343323522, "grad_norm": 22.42500877380371, "learning_rate": 1.4784586566764783e-07, "loss": 0.2131, "step": 6369 }, { "epoch": 0.8522879314958522, "grad_norm": 48.911827087402344, "learning_rate": 1.477120685041477e-07, "loss": 0.4226, "step": 6370 }, { "epoch": 0.8524217286593524, "grad_norm": 44.4848747253418, "learning_rate": 1.4757827134064756e-07, "loss": 0.3624, "step": 6371 }, { "epoch": 0.8525555258228525, "grad_norm": 32.806331634521484, "learning_rate": 1.4744447417714744e-07, "loss": 0.4109, "step": 6372 }, { "epoch": 0.8526893229863527, "grad_norm": 26.06153678894043, "learning_rate": 1.473106770136473e-07, "loss": 0.2347, "step": 6373 }, { "epoch": 0.8528231201498528, "grad_norm": 29.94205093383789, "learning_rate": 1.4717687985014718e-07, "loss": 0.2575, "step": 6374 }, { "epoch": 0.852956917313353, "grad_norm": 48.04520034790039, "learning_rate": 1.4704308268664703e-07, "loss": 0.3973, "step": 6375 }, { "epoch": 0.8530907144768531, "grad_norm": 29.53171730041504, "learning_rate": 1.469092855231469e-07, "loss": 0.3967, "step": 6376 }, { "epoch": 0.8532245116403532, "grad_norm": 31.24090003967285, "learning_rate": 1.4677548835964676e-07, "loss": 0.3867, "step": 6377 }, { "epoch": 0.8533583088038533, "grad_norm": 40.1014289855957, "learning_rate": 1.4664169119614664e-07, "loss": 0.3987, "step": 6378 }, { "epoch": 0.8534921059673535, "grad_norm": 24.48758888244629, "learning_rate": 1.465078940326465e-07, "loss": 0.2531, "step": 6379 }, { "epoch": 0.8536259031308536, "grad_norm": 23.988784790039062, "learning_rate": 1.4637409686914637e-07, "loss": 0.232, "step": 6380 }, { "epoch": 0.8537597002943538, "grad_norm": 31.87835121154785, "learning_rate": 1.4624029970564625e-07, "loss": 0.3343, "step": 6381 }, { "epoch": 0.8538934974578539, "grad_norm": 43.07158279418945, "learning_rate": 1.461065025421461e-07, "loss": 0.3579, "step": 6382 }, { "epoch": 0.8540272946213541, "grad_norm": 35.526771545410156, "learning_rate": 1.4597270537864598e-07, "loss": 0.3149, "step": 6383 }, { "epoch": 0.8541610917848541, "grad_norm": 37.65574264526367, "learning_rate": 1.4583890821514584e-07, "loss": 0.2246, "step": 6384 }, { "epoch": 0.8542948889483543, "grad_norm": 32.70443344116211, "learning_rate": 1.4570511105164572e-07, "loss": 0.5238, "step": 6385 }, { "epoch": 0.8544286861118544, "grad_norm": 30.816455841064453, "learning_rate": 1.4557131388814557e-07, "loss": 0.3795, "step": 6386 }, { "epoch": 0.8545624832753546, "grad_norm": 36.12074279785156, "learning_rate": 1.4543751672464545e-07, "loss": 0.4027, "step": 6387 }, { "epoch": 0.8546962804388547, "grad_norm": 51.837852478027344, "learning_rate": 1.453037195611453e-07, "loss": 0.5697, "step": 6388 }, { "epoch": 0.8548300776023549, "grad_norm": 45.89976501464844, "learning_rate": 1.4516992239764518e-07, "loss": 0.3537, "step": 6389 }, { "epoch": 0.854963874765855, "grad_norm": 31.19024085998535, "learning_rate": 1.45036125234145e-07, "loss": 0.3329, "step": 6390 }, { "epoch": 0.8550976719293552, "grad_norm": 43.10216522216797, "learning_rate": 1.4490232807064489e-07, "loss": 0.6954, "step": 6391 }, { "epoch": 0.8552314690928552, "grad_norm": 36.496910095214844, "learning_rate": 1.4476853090714476e-07, "loss": 0.2318, "step": 6392 }, { "epoch": 0.8553652662563553, "grad_norm": 43.20918273925781, "learning_rate": 1.4463473374364462e-07, "loss": 0.3412, "step": 6393 }, { "epoch": 0.8554990634198555, "grad_norm": 42.66051483154297, "learning_rate": 1.445009365801445e-07, "loss": 0.3964, "step": 6394 }, { "epoch": 0.8556328605833556, "grad_norm": 44.1636848449707, "learning_rate": 1.4436713941664435e-07, "loss": 0.4536, "step": 6395 }, { "epoch": 0.8557666577468558, "grad_norm": 30.611852645874023, "learning_rate": 1.4423334225314423e-07, "loss": 0.1519, "step": 6396 }, { "epoch": 0.8559004549103559, "grad_norm": 41.58430862426758, "learning_rate": 1.4409954508964408e-07, "loss": 0.2347, "step": 6397 }, { "epoch": 0.8560342520738561, "grad_norm": 37.06181716918945, "learning_rate": 1.4396574792614396e-07, "loss": 0.378, "step": 6398 }, { "epoch": 0.8561680492373561, "grad_norm": 30.00678062438965, "learning_rate": 1.4383195076264381e-07, "loss": 0.3911, "step": 6399 }, { "epoch": 0.8563018464008563, "grad_norm": 42.7099723815918, "learning_rate": 1.436981535991437e-07, "loss": 0.5121, "step": 6400 }, { "epoch": 0.8564356435643564, "grad_norm": 31.847349166870117, "learning_rate": 1.4356435643564355e-07, "loss": 0.3851, "step": 6401 }, { "epoch": 0.8565694407278566, "grad_norm": 48.726463317871094, "learning_rate": 1.4343055927214342e-07, "loss": 0.3945, "step": 6402 }, { "epoch": 0.8567032378913567, "grad_norm": 47.92750549316406, "learning_rate": 1.4329676210864328e-07, "loss": 0.3502, "step": 6403 }, { "epoch": 0.8568370350548569, "grad_norm": 45.778411865234375, "learning_rate": 1.4316296494514316e-07, "loss": 0.4349, "step": 6404 }, { "epoch": 0.856970832218357, "grad_norm": 31.366722106933594, "learning_rate": 1.4302916778164304e-07, "loss": 0.3151, "step": 6405 }, { "epoch": 0.857104629381857, "grad_norm": 36.26365280151367, "learning_rate": 1.428953706181429e-07, "loss": 0.3369, "step": 6406 }, { "epoch": 0.8572384265453572, "grad_norm": 33.1202278137207, "learning_rate": 1.4276157345464277e-07, "loss": 0.3313, "step": 6407 }, { "epoch": 0.8573722237088574, "grad_norm": 53.59624099731445, "learning_rate": 1.4262777629114262e-07, "loss": 0.4875, "step": 6408 }, { "epoch": 0.8575060208723575, "grad_norm": 62.67483139038086, "learning_rate": 1.424939791276425e-07, "loss": 0.3868, "step": 6409 }, { "epoch": 0.8576398180358576, "grad_norm": 23.7879638671875, "learning_rate": 1.4236018196414235e-07, "loss": 0.1488, "step": 6410 }, { "epoch": 0.8577736151993578, "grad_norm": 36.94559860229492, "learning_rate": 1.4222638480064223e-07, "loss": 0.4247, "step": 6411 }, { "epoch": 0.857907412362858, "grad_norm": 37.706905364990234, "learning_rate": 1.4209258763714209e-07, "loss": 0.2922, "step": 6412 }, { "epoch": 0.8580412095263581, "grad_norm": 29.90536880493164, "learning_rate": 1.4195879047364196e-07, "loss": 0.3219, "step": 6413 }, { "epoch": 0.8581750066898581, "grad_norm": 36.81233596801758, "learning_rate": 1.4182499331014182e-07, "loss": 0.2495, "step": 6414 }, { "epoch": 0.8583088038533583, "grad_norm": 39.25513458251953, "learning_rate": 1.416911961466417e-07, "loss": 0.4355, "step": 6415 }, { "epoch": 0.8584426010168584, "grad_norm": 34.92658233642578, "learning_rate": 1.4155739898314158e-07, "loss": 0.301, "step": 6416 }, { "epoch": 0.8585763981803586, "grad_norm": 28.901885986328125, "learning_rate": 1.414236018196414e-07, "loss": 0.2407, "step": 6417 }, { "epoch": 0.8587101953438587, "grad_norm": 45.2744026184082, "learning_rate": 1.4128980465614128e-07, "loss": 0.3505, "step": 6418 }, { "epoch": 0.8588439925073589, "grad_norm": 33.26668930053711, "learning_rate": 1.4115600749264113e-07, "loss": 0.3201, "step": 6419 }, { "epoch": 0.858977789670859, "grad_norm": 32.875701904296875, "learning_rate": 1.4102221032914101e-07, "loss": 0.3077, "step": 6420 }, { "epoch": 0.8591115868343591, "grad_norm": 80.58399200439453, "learning_rate": 1.4088841316564087e-07, "loss": 0.5672, "step": 6421 }, { "epoch": 0.8592453839978592, "grad_norm": 52.89815902709961, "learning_rate": 1.4075461600214075e-07, "loss": 0.3456, "step": 6422 }, { "epoch": 0.8593791811613594, "grad_norm": 55.07950210571289, "learning_rate": 1.406208188386406e-07, "loss": 0.6807, "step": 6423 }, { "epoch": 0.8595129783248595, "grad_norm": 31.24540138244629, "learning_rate": 1.4048702167514048e-07, "loss": 0.2543, "step": 6424 }, { "epoch": 0.8596467754883597, "grad_norm": 48.47054672241211, "learning_rate": 1.4035322451164033e-07, "loss": 0.3701, "step": 6425 }, { "epoch": 0.8597805726518598, "grad_norm": 25.540306091308594, "learning_rate": 1.402194273481402e-07, "loss": 0.2885, "step": 6426 }, { "epoch": 0.85991436981536, "grad_norm": 54.930503845214844, "learning_rate": 1.400856301846401e-07, "loss": 0.4503, "step": 6427 }, { "epoch": 0.86004816697886, "grad_norm": 22.883630752563477, "learning_rate": 1.3995183302113994e-07, "loss": 0.2099, "step": 6428 }, { "epoch": 0.8601819641423601, "grad_norm": 50.792728424072266, "learning_rate": 1.3981803585763982e-07, "loss": 0.6161, "step": 6429 }, { "epoch": 0.8603157613058603, "grad_norm": 30.317150115966797, "learning_rate": 1.3968423869413967e-07, "loss": 0.2934, "step": 6430 }, { "epoch": 0.8604495584693604, "grad_norm": 43.03192901611328, "learning_rate": 1.3955044153063955e-07, "loss": 0.496, "step": 6431 }, { "epoch": 0.8605833556328606, "grad_norm": 31.12335968017578, "learning_rate": 1.394166443671394e-07, "loss": 0.2708, "step": 6432 }, { "epoch": 0.8607171527963607, "grad_norm": 24.699623107910156, "learning_rate": 1.3928284720363929e-07, "loss": 0.2645, "step": 6433 }, { "epoch": 0.8608509499598609, "grad_norm": 25.03025245666504, "learning_rate": 1.3914905004013914e-07, "loss": 0.3432, "step": 6434 }, { "epoch": 0.860984747123361, "grad_norm": 33.365989685058594, "learning_rate": 1.3901525287663902e-07, "loss": 0.2256, "step": 6435 }, { "epoch": 0.8611185442868611, "grad_norm": 40.2624626159668, "learning_rate": 1.3888145571313887e-07, "loss": 0.328, "step": 6436 }, { "epoch": 0.8612523414503612, "grad_norm": 51.56748962402344, "learning_rate": 1.3874765854963875e-07, "loss": 0.3868, "step": 6437 }, { "epoch": 0.8613861386138614, "grad_norm": 54.35451126098633, "learning_rate": 1.3861386138613863e-07, "loss": 0.4989, "step": 6438 }, { "epoch": 0.8615199357773615, "grad_norm": 31.61221694946289, "learning_rate": 1.3848006422263848e-07, "loss": 0.2656, "step": 6439 }, { "epoch": 0.8616537329408617, "grad_norm": 45.37440490722656, "learning_rate": 1.3834626705913836e-07, "loss": 0.2636, "step": 6440 }, { "epoch": 0.8617875301043618, "grad_norm": 77.12185668945312, "learning_rate": 1.3821246989563821e-07, "loss": 0.4587, "step": 6441 }, { "epoch": 0.861921327267862, "grad_norm": 49.953155517578125, "learning_rate": 1.380786727321381e-07, "loss": 0.4156, "step": 6442 }, { "epoch": 0.862055124431362, "grad_norm": 38.29004669189453, "learning_rate": 1.3794487556863792e-07, "loss": 0.2264, "step": 6443 }, { "epoch": 0.8621889215948622, "grad_norm": 14.947881698608398, "learning_rate": 1.378110784051378e-07, "loss": 0.1612, "step": 6444 }, { "epoch": 0.8623227187583623, "grad_norm": 43.69098663330078, "learning_rate": 1.3767728124163765e-07, "loss": 0.2721, "step": 6445 }, { "epoch": 0.8624565159218625, "grad_norm": 36.42863845825195, "learning_rate": 1.3754348407813753e-07, "loss": 0.3876, "step": 6446 }, { "epoch": 0.8625903130853626, "grad_norm": 32.886112213134766, "learning_rate": 1.3740968691463738e-07, "loss": 0.3253, "step": 6447 }, { "epoch": 0.8627241102488628, "grad_norm": 34.45376968383789, "learning_rate": 1.3727588975113726e-07, "loss": 0.3572, "step": 6448 }, { "epoch": 0.8628579074123629, "grad_norm": 36.129520416259766, "learning_rate": 1.3714209258763714e-07, "loss": 0.3727, "step": 6449 }, { "epoch": 0.8629917045758629, "grad_norm": 46.169830322265625, "learning_rate": 1.37008295424137e-07, "loss": 0.4142, "step": 6450 }, { "epoch": 0.8631255017393631, "grad_norm": 48.077064514160156, "learning_rate": 1.3687449826063687e-07, "loss": 0.4907, "step": 6451 }, { "epoch": 0.8632592989028632, "grad_norm": 35.38225173950195, "learning_rate": 1.3674070109713673e-07, "loss": 0.3522, "step": 6452 }, { "epoch": 0.8633930960663634, "grad_norm": 42.021915435791016, "learning_rate": 1.366069039336366e-07, "loss": 0.3569, "step": 6453 }, { "epoch": 0.8635268932298635, "grad_norm": 32.984535217285156, "learning_rate": 1.3647310677013646e-07, "loss": 0.2037, "step": 6454 }, { "epoch": 0.8636606903933637, "grad_norm": 66.06759643554688, "learning_rate": 1.3633930960663634e-07, "loss": 0.6405, "step": 6455 }, { "epoch": 0.8637944875568638, "grad_norm": 42.00298309326172, "learning_rate": 1.362055124431362e-07, "loss": 0.4495, "step": 6456 }, { "epoch": 0.863928284720364, "grad_norm": 41.348121643066406, "learning_rate": 1.3607171527963607e-07, "loss": 0.4039, "step": 6457 }, { "epoch": 0.864062081883864, "grad_norm": 56.734867095947266, "learning_rate": 1.3593791811613592e-07, "loss": 0.4779, "step": 6458 }, { "epoch": 0.8641958790473642, "grad_norm": 21.133338928222656, "learning_rate": 1.358041209526358e-07, "loss": 0.2336, "step": 6459 }, { "epoch": 0.8643296762108643, "grad_norm": 71.66407012939453, "learning_rate": 1.3567032378913568e-07, "loss": 0.6603, "step": 6460 }, { "epoch": 0.8644634733743645, "grad_norm": 42.78475570678711, "learning_rate": 1.3553652662563553e-07, "loss": 0.339, "step": 6461 }, { "epoch": 0.8645972705378646, "grad_norm": 78.33342742919922, "learning_rate": 1.3540272946213541e-07, "loss": 0.6076, "step": 6462 }, { "epoch": 0.8647310677013648, "grad_norm": 63.27305221557617, "learning_rate": 1.3526893229863527e-07, "loss": 0.8001, "step": 6463 }, { "epoch": 0.8648648648648649, "grad_norm": 44.362998962402344, "learning_rate": 1.3513513513513515e-07, "loss": 0.3687, "step": 6464 }, { "epoch": 0.864998662028365, "grad_norm": 56.886817932128906, "learning_rate": 1.35001337971635e-07, "loss": 0.4258, "step": 6465 }, { "epoch": 0.8651324591918651, "grad_norm": 22.034997940063477, "learning_rate": 1.3486754080813488e-07, "loss": 0.2318, "step": 6466 }, { "epoch": 0.8652662563553652, "grad_norm": 57.4190559387207, "learning_rate": 1.3473374364463473e-07, "loss": 0.6058, "step": 6467 }, { "epoch": 0.8654000535188654, "grad_norm": 58.5720100402832, "learning_rate": 1.345999464811346e-07, "loss": 0.4636, "step": 6468 }, { "epoch": 0.8655338506823655, "grad_norm": 26.378223419189453, "learning_rate": 1.3446614931763444e-07, "loss": 0.245, "step": 6469 }, { "epoch": 0.8656676478458657, "grad_norm": 51.666358947753906, "learning_rate": 1.3433235215413432e-07, "loss": 0.4848, "step": 6470 }, { "epoch": 0.8658014450093658, "grad_norm": 40.76968765258789, "learning_rate": 1.341985549906342e-07, "loss": 0.3319, "step": 6471 }, { "epoch": 0.8659352421728659, "grad_norm": 36.467708587646484, "learning_rate": 1.3406475782713405e-07, "loss": 0.4038, "step": 6472 }, { "epoch": 0.866069039336366, "grad_norm": 28.07000160217285, "learning_rate": 1.3393096066363393e-07, "loss": 0.1977, "step": 6473 }, { "epoch": 0.8662028364998662, "grad_norm": 43.67180252075195, "learning_rate": 1.3379716350013378e-07, "loss": 0.4825, "step": 6474 }, { "epoch": 0.8663366336633663, "grad_norm": 38.398746490478516, "learning_rate": 1.3366336633663366e-07, "loss": 0.4618, "step": 6475 }, { "epoch": 0.8664704308268665, "grad_norm": 29.58359718322754, "learning_rate": 1.335295691731335e-07, "loss": 0.2301, "step": 6476 }, { "epoch": 0.8666042279903666, "grad_norm": 49.32339096069336, "learning_rate": 1.333957720096334e-07, "loss": 0.4586, "step": 6477 }, { "epoch": 0.8667380251538668, "grad_norm": 49.97959518432617, "learning_rate": 1.3326197484613324e-07, "loss": 0.5543, "step": 6478 }, { "epoch": 0.8668718223173669, "grad_norm": 42.6088752746582, "learning_rate": 1.3312817768263312e-07, "loss": 0.4085, "step": 6479 }, { "epoch": 0.867005619480867, "grad_norm": 33.220218658447266, "learning_rate": 1.3299438051913298e-07, "loss": 0.3916, "step": 6480 }, { "epoch": 0.8671394166443671, "grad_norm": 25.44434928894043, "learning_rate": 1.3286058335563285e-07, "loss": 0.2596, "step": 6481 }, { "epoch": 0.8672732138078673, "grad_norm": 35.33567810058594, "learning_rate": 1.3272678619213273e-07, "loss": 0.456, "step": 6482 }, { "epoch": 0.8674070109713674, "grad_norm": 20.769445419311523, "learning_rate": 1.325929890286326e-07, "loss": 0.1636, "step": 6483 }, { "epoch": 0.8675408081348676, "grad_norm": 50.170616149902344, "learning_rate": 1.3245919186513247e-07, "loss": 0.3633, "step": 6484 }, { "epoch": 0.8676746052983677, "grad_norm": 43.983421325683594, "learning_rate": 1.3232539470163232e-07, "loss": 0.3103, "step": 6485 }, { "epoch": 0.8678084024618679, "grad_norm": 30.5809383392334, "learning_rate": 1.321915975381322e-07, "loss": 0.2469, "step": 6486 }, { "epoch": 0.8679421996253679, "grad_norm": 47.08807373046875, "learning_rate": 1.3205780037463205e-07, "loss": 0.4367, "step": 6487 }, { "epoch": 0.868075996788868, "grad_norm": 55.055320739746094, "learning_rate": 1.3192400321113193e-07, "loss": 0.4367, "step": 6488 }, { "epoch": 0.8682097939523682, "grad_norm": 61.707130432128906, "learning_rate": 1.3179020604763178e-07, "loss": 0.4723, "step": 6489 }, { "epoch": 0.8683435911158683, "grad_norm": 55.49782943725586, "learning_rate": 1.3165640888413166e-07, "loss": 0.8327, "step": 6490 }, { "epoch": 0.8684773882793685, "grad_norm": 32.0633430480957, "learning_rate": 1.3152261172063152e-07, "loss": 0.1712, "step": 6491 }, { "epoch": 0.8686111854428686, "grad_norm": 35.20212173461914, "learning_rate": 1.313888145571314e-07, "loss": 0.3632, "step": 6492 }, { "epoch": 0.8687449826063688, "grad_norm": 29.68246078491211, "learning_rate": 1.3125501739363127e-07, "loss": 0.315, "step": 6493 }, { "epoch": 0.8688787797698688, "grad_norm": 51.42009735107422, "learning_rate": 1.311212202301311e-07, "loss": 0.544, "step": 6494 }, { "epoch": 0.869012576933369, "grad_norm": 53.763282775878906, "learning_rate": 1.3098742306663098e-07, "loss": 0.6278, "step": 6495 }, { "epoch": 0.8691463740968691, "grad_norm": 61.177955627441406, "learning_rate": 1.3085362590313083e-07, "loss": 0.5274, "step": 6496 }, { "epoch": 0.8692801712603693, "grad_norm": 19.52349090576172, "learning_rate": 1.307198287396307e-07, "loss": 0.125, "step": 6497 }, { "epoch": 0.8694139684238694, "grad_norm": 45.45930862426758, "learning_rate": 1.3058603157613056e-07, "loss": 0.6564, "step": 6498 }, { "epoch": 0.8695477655873696, "grad_norm": 42.00468063354492, "learning_rate": 1.3045223441263044e-07, "loss": 0.3686, "step": 6499 }, { "epoch": 0.8696815627508697, "grad_norm": 42.4316520690918, "learning_rate": 1.303184372491303e-07, "loss": 0.3893, "step": 6500 }, { "epoch": 0.8698153599143699, "grad_norm": 35.393978118896484, "learning_rate": 1.3018464008563018e-07, "loss": 0.3236, "step": 6501 }, { "epoch": 0.8699491570778699, "grad_norm": 34.9990234375, "learning_rate": 1.3005084292213003e-07, "loss": 0.1992, "step": 6502 }, { "epoch": 0.87008295424137, "grad_norm": 43.32398986816406, "learning_rate": 1.299170457586299e-07, "loss": 0.3504, "step": 6503 }, { "epoch": 0.8702167514048702, "grad_norm": 42.15196228027344, "learning_rate": 1.297832485951298e-07, "loss": 0.4273, "step": 6504 }, { "epoch": 0.8703505485683704, "grad_norm": 28.396608352661133, "learning_rate": 1.2964945143162964e-07, "loss": 0.2578, "step": 6505 }, { "epoch": 0.8704843457318705, "grad_norm": 26.154064178466797, "learning_rate": 1.2951565426812952e-07, "loss": 0.3927, "step": 6506 }, { "epoch": 0.8706181428953707, "grad_norm": 34.64122772216797, "learning_rate": 1.2938185710462937e-07, "loss": 0.3756, "step": 6507 }, { "epoch": 0.8707519400588708, "grad_norm": 40.65627670288086, "learning_rate": 1.2924805994112925e-07, "loss": 0.285, "step": 6508 }, { "epoch": 0.8708857372223708, "grad_norm": 53.709896087646484, "learning_rate": 1.291142627776291e-07, "loss": 0.6886, "step": 6509 }, { "epoch": 0.871019534385871, "grad_norm": 42.04459762573242, "learning_rate": 1.2898046561412898e-07, "loss": 0.3278, "step": 6510 }, { "epoch": 0.8711533315493711, "grad_norm": 43.18743896484375, "learning_rate": 1.2884666845062884e-07, "loss": 0.2968, "step": 6511 }, { "epoch": 0.8712871287128713, "grad_norm": 35.98674392700195, "learning_rate": 1.2871287128712872e-07, "loss": 0.3156, "step": 6512 }, { "epoch": 0.8714209258763714, "grad_norm": 38.811180114746094, "learning_rate": 1.2857907412362857e-07, "loss": 0.2799, "step": 6513 }, { "epoch": 0.8715547230398716, "grad_norm": 40.56391143798828, "learning_rate": 1.2844527696012845e-07, "loss": 0.4013, "step": 6514 }, { "epoch": 0.8716885202033717, "grad_norm": 31.376392364501953, "learning_rate": 1.2831147979662833e-07, "loss": 0.3715, "step": 6515 }, { "epoch": 0.8718223173668718, "grad_norm": 33.63273239135742, "learning_rate": 1.2817768263312818e-07, "loss": 0.2325, "step": 6516 }, { "epoch": 0.8719561145303719, "grad_norm": 37.9571533203125, "learning_rate": 1.2804388546962806e-07, "loss": 0.1947, "step": 6517 }, { "epoch": 0.8720899116938721, "grad_norm": 31.58262825012207, "learning_rate": 1.279100883061279e-07, "loss": 0.2989, "step": 6518 }, { "epoch": 0.8722237088573722, "grad_norm": 47.85296630859375, "learning_rate": 1.277762911426278e-07, "loss": 0.4271, "step": 6519 }, { "epoch": 0.8723575060208724, "grad_norm": 34.30717468261719, "learning_rate": 1.2764249397912762e-07, "loss": 0.3714, "step": 6520 }, { "epoch": 0.8724913031843725, "grad_norm": 38.53110122680664, "learning_rate": 1.275086968156275e-07, "loss": 0.3341, "step": 6521 }, { "epoch": 0.8726251003478727, "grad_norm": 39.18960189819336, "learning_rate": 1.2737489965212735e-07, "loss": 0.2742, "step": 6522 }, { "epoch": 0.8727588975113728, "grad_norm": 52.616485595703125, "learning_rate": 1.2724110248862723e-07, "loss": 0.3828, "step": 6523 }, { "epoch": 0.8728926946748728, "grad_norm": 46.90449905395508, "learning_rate": 1.2710730532512708e-07, "loss": 0.362, "step": 6524 }, { "epoch": 0.873026491838373, "grad_norm": 31.66071319580078, "learning_rate": 1.2697350816162696e-07, "loss": 0.2486, "step": 6525 }, { "epoch": 0.8731602890018731, "grad_norm": 54.88965606689453, "learning_rate": 1.2683971099812684e-07, "loss": 0.571, "step": 6526 }, { "epoch": 0.8732940861653733, "grad_norm": 37.426116943359375, "learning_rate": 1.267059138346267e-07, "loss": 0.2488, "step": 6527 }, { "epoch": 0.8734278833288734, "grad_norm": 60.21939468383789, "learning_rate": 1.2657211667112657e-07, "loss": 0.3589, "step": 6528 }, { "epoch": 0.8735616804923736, "grad_norm": 33.82784652709961, "learning_rate": 1.2643831950762642e-07, "loss": 0.224, "step": 6529 }, { "epoch": 0.8736954776558737, "grad_norm": 50.13994216918945, "learning_rate": 1.263045223441263e-07, "loss": 0.3944, "step": 6530 }, { "epoch": 0.8738292748193738, "grad_norm": 39.37262725830078, "learning_rate": 1.2617072518062616e-07, "loss": 0.3288, "step": 6531 }, { "epoch": 0.8739630719828739, "grad_norm": 34.37504196166992, "learning_rate": 1.2603692801712604e-07, "loss": 0.2838, "step": 6532 }, { "epoch": 0.8740968691463741, "grad_norm": 67.80683135986328, "learning_rate": 1.259031308536259e-07, "loss": 0.7597, "step": 6533 }, { "epoch": 0.8742306663098742, "grad_norm": 23.807100296020508, "learning_rate": 1.2576933369012577e-07, "loss": 0.2878, "step": 6534 }, { "epoch": 0.8743644634733744, "grad_norm": 39.39886474609375, "learning_rate": 1.2563553652662562e-07, "loss": 0.255, "step": 6535 }, { "epoch": 0.8744982606368745, "grad_norm": 31.500625610351562, "learning_rate": 1.255017393631255e-07, "loss": 0.2888, "step": 6536 }, { "epoch": 0.8746320578003747, "grad_norm": 41.45193099975586, "learning_rate": 1.2536794219962538e-07, "loss": 0.5634, "step": 6537 }, { "epoch": 0.8747658549638747, "grad_norm": 50.66679000854492, "learning_rate": 1.2523414503612523e-07, "loss": 0.3712, "step": 6538 }, { "epoch": 0.8748996521273749, "grad_norm": 56.48086166381836, "learning_rate": 1.251003478726251e-07, "loss": 0.6696, "step": 6539 }, { "epoch": 0.875033449290875, "grad_norm": 59.42985916137695, "learning_rate": 1.2496655070912496e-07, "loss": 0.5866, "step": 6540 }, { "epoch": 0.8751672464543752, "grad_norm": 37.82404327392578, "learning_rate": 1.2483275354562482e-07, "loss": 0.1653, "step": 6541 }, { "epoch": 0.8753010436178753, "grad_norm": 29.630752563476562, "learning_rate": 1.246989563821247e-07, "loss": 0.254, "step": 6542 }, { "epoch": 0.8754348407813755, "grad_norm": 64.58856201171875, "learning_rate": 1.2456515921862458e-07, "loss": 0.5322, "step": 6543 }, { "epoch": 0.8755686379448756, "grad_norm": 29.433435440063477, "learning_rate": 1.2443136205512443e-07, "loss": 0.198, "step": 6544 }, { "epoch": 0.8757024351083758, "grad_norm": 37.01191329956055, "learning_rate": 1.242975648916243e-07, "loss": 0.3259, "step": 6545 }, { "epoch": 0.8758362322718758, "grad_norm": 23.86490821838379, "learning_rate": 1.2416376772812416e-07, "loss": 0.2356, "step": 6546 }, { "epoch": 0.8759700294353759, "grad_norm": 54.11637496948242, "learning_rate": 1.24029970564624e-07, "loss": 0.4676, "step": 6547 }, { "epoch": 0.8761038265988761, "grad_norm": 55.49176788330078, "learning_rate": 1.238961734011239e-07, "loss": 0.3717, "step": 6548 }, { "epoch": 0.8762376237623762, "grad_norm": 30.15744400024414, "learning_rate": 1.2376237623762375e-07, "loss": 0.3379, "step": 6549 }, { "epoch": 0.8763714209258764, "grad_norm": 29.192739486694336, "learning_rate": 1.2362857907412362e-07, "loss": 0.3454, "step": 6550 }, { "epoch": 0.8765052180893765, "grad_norm": 34.94120788574219, "learning_rate": 1.2349478191062348e-07, "loss": 0.2069, "step": 6551 }, { "epoch": 0.8766390152528767, "grad_norm": 30.343599319458008, "learning_rate": 1.2336098474712336e-07, "loss": 0.3202, "step": 6552 }, { "epoch": 0.8767728124163767, "grad_norm": 38.864654541015625, "learning_rate": 1.232271875836232e-07, "loss": 0.3206, "step": 6553 }, { "epoch": 0.8769066095798769, "grad_norm": 58.807533264160156, "learning_rate": 1.230933904201231e-07, "loss": 0.5834, "step": 6554 }, { "epoch": 0.877040406743377, "grad_norm": 22.409292221069336, "learning_rate": 1.2295959325662297e-07, "loss": 0.2156, "step": 6555 }, { "epoch": 0.8771742039068772, "grad_norm": 57.49614334106445, "learning_rate": 1.2282579609312282e-07, "loss": 0.5048, "step": 6556 }, { "epoch": 0.8773080010703773, "grad_norm": 27.892791748046875, "learning_rate": 1.226919989296227e-07, "loss": 0.2302, "step": 6557 }, { "epoch": 0.8774417982338775, "grad_norm": 87.10271453857422, "learning_rate": 1.2255820176612255e-07, "loss": 0.7038, "step": 6558 }, { "epoch": 0.8775755953973776, "grad_norm": 39.254825592041016, "learning_rate": 1.2242440460262243e-07, "loss": 0.4216, "step": 6559 }, { "epoch": 0.8777093925608777, "grad_norm": 54.38613510131836, "learning_rate": 1.2229060743912228e-07, "loss": 0.3724, "step": 6560 }, { "epoch": 0.8778431897243778, "grad_norm": 21.477624893188477, "learning_rate": 1.2215681027562214e-07, "loss": 0.2006, "step": 6561 }, { "epoch": 0.877976986887878, "grad_norm": 36.831787109375, "learning_rate": 1.2202301311212202e-07, "loss": 0.3395, "step": 6562 }, { "epoch": 0.8781107840513781, "grad_norm": 33.6473274230957, "learning_rate": 1.2188921594862187e-07, "loss": 0.3849, "step": 6563 }, { "epoch": 0.8782445812148783, "grad_norm": 53.753028869628906, "learning_rate": 1.2175541878512175e-07, "loss": 0.524, "step": 6564 }, { "epoch": 0.8783783783783784, "grad_norm": 51.08613586425781, "learning_rate": 1.2162162162162163e-07, "loss": 0.5559, "step": 6565 }, { "epoch": 0.8785121755418785, "grad_norm": 44.02448272705078, "learning_rate": 1.2148782445812148e-07, "loss": 0.4048, "step": 6566 }, { "epoch": 0.8786459727053787, "grad_norm": 33.48659133911133, "learning_rate": 1.2135402729462136e-07, "loss": 0.2104, "step": 6567 }, { "epoch": 0.8787797698688787, "grad_norm": 42.41632843017578, "learning_rate": 1.212202301311212e-07, "loss": 0.5232, "step": 6568 }, { "epoch": 0.8789135670323789, "grad_norm": 52.56190490722656, "learning_rate": 1.210864329676211e-07, "loss": 0.2609, "step": 6569 }, { "epoch": 0.879047364195879, "grad_norm": 33.05525207519531, "learning_rate": 1.2095263580412095e-07, "loss": 0.369, "step": 6570 }, { "epoch": 0.8791811613593792, "grad_norm": 38.455177307128906, "learning_rate": 1.2081883864062082e-07, "loss": 0.3933, "step": 6571 }, { "epoch": 0.8793149585228793, "grad_norm": 27.566396713256836, "learning_rate": 1.2068504147712068e-07, "loss": 0.4468, "step": 6572 }, { "epoch": 0.8794487556863795, "grad_norm": 38.177650451660156, "learning_rate": 1.2055124431362053e-07, "loss": 0.3953, "step": 6573 }, { "epoch": 0.8795825528498796, "grad_norm": 20.61552619934082, "learning_rate": 1.204174471501204e-07, "loss": 0.1606, "step": 6574 }, { "epoch": 0.8797163500133797, "grad_norm": 34.64502716064453, "learning_rate": 1.2028364998662026e-07, "loss": 0.3463, "step": 6575 }, { "epoch": 0.8798501471768798, "grad_norm": 38.49555587768555, "learning_rate": 1.2014985282312014e-07, "loss": 0.6049, "step": 6576 }, { "epoch": 0.87998394434038, "grad_norm": 40.3445930480957, "learning_rate": 1.2001605565962002e-07, "loss": 0.3086, "step": 6577 }, { "epoch": 0.8801177415038801, "grad_norm": 46.014705657958984, "learning_rate": 1.1988225849611987e-07, "loss": 0.5268, "step": 6578 }, { "epoch": 0.8802515386673803, "grad_norm": 38.745853424072266, "learning_rate": 1.1974846133261975e-07, "loss": 0.2233, "step": 6579 }, { "epoch": 0.8803853358308804, "grad_norm": 38.066253662109375, "learning_rate": 1.196146641691196e-07, "loss": 0.309, "step": 6580 }, { "epoch": 0.8805191329943806, "grad_norm": 43.422706604003906, "learning_rate": 1.1948086700561948e-07, "loss": 0.3253, "step": 6581 }, { "epoch": 0.8806529301578806, "grad_norm": 44.83428192138672, "learning_rate": 1.1934706984211934e-07, "loss": 0.3185, "step": 6582 }, { "epoch": 0.8807867273213807, "grad_norm": 48.31121063232422, "learning_rate": 1.1921327267861922e-07, "loss": 0.5228, "step": 6583 }, { "epoch": 0.8809205244848809, "grad_norm": 36.8511848449707, "learning_rate": 1.1907947551511907e-07, "loss": 0.2733, "step": 6584 }, { "epoch": 0.881054321648381, "grad_norm": 62.584716796875, "learning_rate": 1.1894567835161894e-07, "loss": 0.4576, "step": 6585 }, { "epoch": 0.8811881188118812, "grad_norm": 31.994924545288086, "learning_rate": 1.188118811881188e-07, "loss": 0.307, "step": 6586 }, { "epoch": 0.8813219159753813, "grad_norm": 60.56879806518555, "learning_rate": 1.1867808402461867e-07, "loss": 0.5671, "step": 6587 }, { "epoch": 0.8814557131388815, "grad_norm": 58.513763427734375, "learning_rate": 1.1854428686111855e-07, "loss": 0.487, "step": 6588 }, { "epoch": 0.8815895103023816, "grad_norm": 25.830421447753906, "learning_rate": 1.1841048969761841e-07, "loss": 0.2253, "step": 6589 }, { "epoch": 0.8817233074658817, "grad_norm": 45.459110260009766, "learning_rate": 1.1827669253411828e-07, "loss": 0.4072, "step": 6590 }, { "epoch": 0.8818571046293818, "grad_norm": 57.727561950683594, "learning_rate": 1.1814289537061815e-07, "loss": 0.5691, "step": 6591 }, { "epoch": 0.881990901792882, "grad_norm": 45.255367279052734, "learning_rate": 1.1800909820711801e-07, "loss": 0.3717, "step": 6592 }, { "epoch": 0.8821246989563821, "grad_norm": 44.46246337890625, "learning_rate": 1.1787530104361786e-07, "loss": 0.2813, "step": 6593 }, { "epoch": 0.8822584961198823, "grad_norm": 33.215110778808594, "learning_rate": 1.1774150388011773e-07, "loss": 0.315, "step": 6594 }, { "epoch": 0.8823922932833824, "grad_norm": 41.200130462646484, "learning_rate": 1.176077067166176e-07, "loss": 0.3273, "step": 6595 }, { "epoch": 0.8825260904468826, "grad_norm": 45.415313720703125, "learning_rate": 1.1747390955311746e-07, "loss": 0.3607, "step": 6596 }, { "epoch": 0.8826598876103826, "grad_norm": 20.66087532043457, "learning_rate": 1.1734011238961733e-07, "loss": 0.1372, "step": 6597 }, { "epoch": 0.8827936847738828, "grad_norm": 30.10331153869629, "learning_rate": 1.172063152261172e-07, "loss": 0.2889, "step": 6598 }, { "epoch": 0.8829274819373829, "grad_norm": 51.63243865966797, "learning_rate": 1.1707251806261707e-07, "loss": 0.4578, "step": 6599 }, { "epoch": 0.8830612791008831, "grad_norm": 36.43925094604492, "learning_rate": 1.1693872089911694e-07, "loss": 0.3908, "step": 6600 }, { "epoch": 0.8831950762643832, "grad_norm": 44.561485290527344, "learning_rate": 1.168049237356168e-07, "loss": 0.2537, "step": 6601 }, { "epoch": 0.8833288734278834, "grad_norm": 40.191688537597656, "learning_rate": 1.1667112657211667e-07, "loss": 0.2908, "step": 6602 }, { "epoch": 0.8834626705913835, "grad_norm": 33.265899658203125, "learning_rate": 1.1653732940861654e-07, "loss": 0.2081, "step": 6603 }, { "epoch": 0.8835964677548835, "grad_norm": 39.213623046875, "learning_rate": 1.164035322451164e-07, "loss": 0.38, "step": 6604 }, { "epoch": 0.8837302649183837, "grad_norm": 26.286651611328125, "learning_rate": 1.1626973508161627e-07, "loss": 0.2701, "step": 6605 }, { "epoch": 0.8838640620818838, "grad_norm": 43.37248611450195, "learning_rate": 1.1613593791811612e-07, "loss": 0.3274, "step": 6606 }, { "epoch": 0.883997859245384, "grad_norm": 34.503883361816406, "learning_rate": 1.1600214075461599e-07, "loss": 0.3044, "step": 6607 }, { "epoch": 0.8841316564088841, "grad_norm": 36.00867462158203, "learning_rate": 1.1586834359111585e-07, "loss": 0.3358, "step": 6608 }, { "epoch": 0.8842654535723843, "grad_norm": 28.2138729095459, "learning_rate": 1.1573454642761572e-07, "loss": 0.3111, "step": 6609 }, { "epoch": 0.8843992507358844, "grad_norm": 47.16729736328125, "learning_rate": 1.156007492641156e-07, "loss": 0.2733, "step": 6610 }, { "epoch": 0.8845330478993846, "grad_norm": 39.59785461425781, "learning_rate": 1.1546695210061547e-07, "loss": 0.3009, "step": 6611 }, { "epoch": 0.8846668450628846, "grad_norm": 42.07240676879883, "learning_rate": 1.1533315493711533e-07, "loss": 0.4019, "step": 6612 }, { "epoch": 0.8848006422263848, "grad_norm": 32.89984893798828, "learning_rate": 1.151993577736152e-07, "loss": 0.2574, "step": 6613 }, { "epoch": 0.8849344393898849, "grad_norm": 34.980831146240234, "learning_rate": 1.1506556061011506e-07, "loss": 0.2956, "step": 6614 }, { "epoch": 0.8850682365533851, "grad_norm": 47.5089111328125, "learning_rate": 1.1493176344661493e-07, "loss": 0.3133, "step": 6615 }, { "epoch": 0.8852020337168852, "grad_norm": 39.528892517089844, "learning_rate": 1.147979662831148e-07, "loss": 0.3097, "step": 6616 }, { "epoch": 0.8853358308803854, "grad_norm": 60.950599670410156, "learning_rate": 1.1466416911961466e-07, "loss": 0.5779, "step": 6617 }, { "epoch": 0.8854696280438855, "grad_norm": 47.15777587890625, "learning_rate": 1.1453037195611451e-07, "loss": 0.4135, "step": 6618 }, { "epoch": 0.8856034252073856, "grad_norm": 54.119300842285156, "learning_rate": 1.1439657479261438e-07, "loss": 0.3825, "step": 6619 }, { "epoch": 0.8857372223708857, "grad_norm": 37.20511245727539, "learning_rate": 1.1426277762911425e-07, "loss": 0.252, "step": 6620 }, { "epoch": 0.8858710195343859, "grad_norm": 47.52375030517578, "learning_rate": 1.1412898046561413e-07, "loss": 0.4019, "step": 6621 }, { "epoch": 0.886004816697886, "grad_norm": 32.156436920166016, "learning_rate": 1.1399518330211399e-07, "loss": 0.2749, "step": 6622 }, { "epoch": 0.8861386138613861, "grad_norm": 62.43024826049805, "learning_rate": 1.1386138613861386e-07, "loss": 0.3994, "step": 6623 }, { "epoch": 0.8862724110248863, "grad_norm": 41.53732681274414, "learning_rate": 1.1372758897511372e-07, "loss": 0.5257, "step": 6624 }, { "epoch": 0.8864062081883864, "grad_norm": 35.10202407836914, "learning_rate": 1.1359379181161359e-07, "loss": 0.2641, "step": 6625 }, { "epoch": 0.8865400053518866, "grad_norm": 31.569538116455078, "learning_rate": 1.1345999464811346e-07, "loss": 0.484, "step": 6626 }, { "epoch": 0.8866738025153866, "grad_norm": 29.49180793762207, "learning_rate": 1.1332619748461332e-07, "loss": 0.2027, "step": 6627 }, { "epoch": 0.8868075996788868, "grad_norm": 14.842036247253418, "learning_rate": 1.1319240032111319e-07, "loss": 0.1119, "step": 6628 }, { "epoch": 0.8869413968423869, "grad_norm": 32.31303024291992, "learning_rate": 1.1305860315761305e-07, "loss": 0.2078, "step": 6629 }, { "epoch": 0.8870751940058871, "grad_norm": 28.6807861328125, "learning_rate": 1.1292480599411292e-07, "loss": 0.3081, "step": 6630 }, { "epoch": 0.8872089911693872, "grad_norm": 36.08230972290039, "learning_rate": 1.1279100883061277e-07, "loss": 0.3953, "step": 6631 }, { "epoch": 0.8873427883328874, "grad_norm": 24.067825317382812, "learning_rate": 1.1265721166711265e-07, "loss": 0.1653, "step": 6632 }, { "epoch": 0.8874765854963875, "grad_norm": 26.656261444091797, "learning_rate": 1.1252341450361252e-07, "loss": 0.1581, "step": 6633 }, { "epoch": 0.8876103826598876, "grad_norm": 39.890525817871094, "learning_rate": 1.1238961734011238e-07, "loss": 0.2147, "step": 6634 }, { "epoch": 0.8877441798233877, "grad_norm": 52.584564208984375, "learning_rate": 1.1225582017661225e-07, "loss": 0.3662, "step": 6635 }, { "epoch": 0.8878779769868879, "grad_norm": 27.412023544311523, "learning_rate": 1.1212202301311212e-07, "loss": 0.2448, "step": 6636 }, { "epoch": 0.888011774150388, "grad_norm": 25.446847915649414, "learning_rate": 1.1198822584961198e-07, "loss": 0.2196, "step": 6637 }, { "epoch": 0.8881455713138882, "grad_norm": 53.01782989501953, "learning_rate": 1.1185442868611185e-07, "loss": 0.5545, "step": 6638 }, { "epoch": 0.8882793684773883, "grad_norm": 26.98576545715332, "learning_rate": 1.1172063152261171e-07, "loss": 0.1329, "step": 6639 }, { "epoch": 0.8884131656408885, "grad_norm": 52.47455596923828, "learning_rate": 1.1158683435911158e-07, "loss": 0.3104, "step": 6640 }, { "epoch": 0.8885469628043885, "grad_norm": 39.22867965698242, "learning_rate": 1.1145303719561145e-07, "loss": 0.3888, "step": 6641 }, { "epoch": 0.8886807599678886, "grad_norm": 34.33979797363281, "learning_rate": 1.1131924003211131e-07, "loss": 0.2335, "step": 6642 }, { "epoch": 0.8888145571313888, "grad_norm": 66.18449401855469, "learning_rate": 1.1118544286861119e-07, "loss": 0.554, "step": 6643 }, { "epoch": 0.8889483542948889, "grad_norm": 29.177871704101562, "learning_rate": 1.1105164570511106e-07, "loss": 0.2106, "step": 6644 }, { "epoch": 0.8890821514583891, "grad_norm": 26.962862014770508, "learning_rate": 1.1091784854161091e-07, "loss": 0.3595, "step": 6645 }, { "epoch": 0.8892159486218892, "grad_norm": 45.715431213378906, "learning_rate": 1.1078405137811078e-07, "loss": 0.2909, "step": 6646 }, { "epoch": 0.8893497457853894, "grad_norm": 40.99060821533203, "learning_rate": 1.1065025421461064e-07, "loss": 0.2506, "step": 6647 }, { "epoch": 0.8894835429488895, "grad_norm": 32.44841003417969, "learning_rate": 1.1051645705111051e-07, "loss": 0.4412, "step": 6648 }, { "epoch": 0.8896173401123896, "grad_norm": 86.29796600341797, "learning_rate": 1.1038265988761038e-07, "loss": 0.6826, "step": 6649 }, { "epoch": 0.8897511372758897, "grad_norm": 48.71147918701172, "learning_rate": 1.1024886272411024e-07, "loss": 0.3992, "step": 6650 }, { "epoch": 0.8898849344393899, "grad_norm": 34.44331359863281, "learning_rate": 1.1011506556061011e-07, "loss": 0.2358, "step": 6651 }, { "epoch": 0.89001873160289, "grad_norm": 53.71746826171875, "learning_rate": 1.0998126839710997e-07, "loss": 0.3239, "step": 6652 }, { "epoch": 0.8901525287663902, "grad_norm": 51.67351150512695, "learning_rate": 1.0984747123360984e-07, "loss": 0.2754, "step": 6653 }, { "epoch": 0.8902863259298903, "grad_norm": 29.16888427734375, "learning_rate": 1.097136740701097e-07, "loss": 0.2741, "step": 6654 }, { "epoch": 0.8904201230933905, "grad_norm": 56.54344940185547, "learning_rate": 1.0957987690660958e-07, "loss": 0.3615, "step": 6655 }, { "epoch": 0.8905539202568905, "grad_norm": 38.518802642822266, "learning_rate": 1.0944607974310945e-07, "loss": 0.491, "step": 6656 }, { "epoch": 0.8906877174203907, "grad_norm": 46.958961486816406, "learning_rate": 1.0931228257960932e-07, "loss": 0.3621, "step": 6657 }, { "epoch": 0.8908215145838908, "grad_norm": 49.193389892578125, "learning_rate": 1.0917848541610917e-07, "loss": 0.2923, "step": 6658 }, { "epoch": 0.890955311747391, "grad_norm": 58.1423225402832, "learning_rate": 1.0904468825260904e-07, "loss": 0.6959, "step": 6659 }, { "epoch": 0.8910891089108911, "grad_norm": 67.53247833251953, "learning_rate": 1.089108910891089e-07, "loss": 0.3169, "step": 6660 }, { "epoch": 0.8912229060743913, "grad_norm": 35.80205535888672, "learning_rate": 1.0877709392560877e-07, "loss": 0.3073, "step": 6661 }, { "epoch": 0.8913567032378914, "grad_norm": 23.75294303894043, "learning_rate": 1.0864329676210863e-07, "loss": 0.157, "step": 6662 }, { "epoch": 0.8914905004013914, "grad_norm": 49.49934387207031, "learning_rate": 1.085094995986085e-07, "loss": 0.3538, "step": 6663 }, { "epoch": 0.8916242975648916, "grad_norm": 39.34025955200195, "learning_rate": 1.0837570243510837e-07, "loss": 0.2277, "step": 6664 }, { "epoch": 0.8917580947283917, "grad_norm": 38.19217300415039, "learning_rate": 1.0824190527160823e-07, "loss": 0.319, "step": 6665 }, { "epoch": 0.8918918918918919, "grad_norm": 52.3237190246582, "learning_rate": 1.0810810810810811e-07, "loss": 0.447, "step": 6666 }, { "epoch": 0.892025689055392, "grad_norm": 29.480844497680664, "learning_rate": 1.0797431094460798e-07, "loss": 0.2432, "step": 6667 }, { "epoch": 0.8921594862188922, "grad_norm": 33.72132110595703, "learning_rate": 1.0784051378110784e-07, "loss": 0.3915, "step": 6668 }, { "epoch": 0.8922932833823923, "grad_norm": 49.08456039428711, "learning_rate": 1.0770671661760771e-07, "loss": 0.4051, "step": 6669 }, { "epoch": 0.8924270805458925, "grad_norm": 38.736148834228516, "learning_rate": 1.0757291945410758e-07, "loss": 0.3001, "step": 6670 }, { "epoch": 0.8925608777093925, "grad_norm": 40.42055892944336, "learning_rate": 1.0743912229060743e-07, "loss": 0.1754, "step": 6671 }, { "epoch": 0.8926946748728927, "grad_norm": 23.144636154174805, "learning_rate": 1.073053251271073e-07, "loss": 0.2073, "step": 6672 }, { "epoch": 0.8928284720363928, "grad_norm": 34.88660430908203, "learning_rate": 1.0717152796360716e-07, "loss": 0.393, "step": 6673 }, { "epoch": 0.892962269199893, "grad_norm": 47.00102233886719, "learning_rate": 1.0703773080010703e-07, "loss": 0.4959, "step": 6674 }, { "epoch": 0.8930960663633931, "grad_norm": 39.74624252319336, "learning_rate": 1.0690393363660689e-07, "loss": 0.2842, "step": 6675 }, { "epoch": 0.8932298635268933, "grad_norm": 60.84891891479492, "learning_rate": 1.0677013647310676e-07, "loss": 0.4394, "step": 6676 }, { "epoch": 0.8933636606903934, "grad_norm": 39.69636154174805, "learning_rate": 1.0663633930960664e-07, "loss": 0.2753, "step": 6677 }, { "epoch": 0.8934974578538935, "grad_norm": 36.86738204956055, "learning_rate": 1.065025421461065e-07, "loss": 0.2388, "step": 6678 }, { "epoch": 0.8936312550173936, "grad_norm": 44.20252990722656, "learning_rate": 1.0636874498260637e-07, "loss": 0.279, "step": 6679 }, { "epoch": 0.8937650521808937, "grad_norm": 37.96697998046875, "learning_rate": 1.0623494781910624e-07, "loss": 0.3603, "step": 6680 }, { "epoch": 0.8938988493443939, "grad_norm": 35.978702545166016, "learning_rate": 1.061011506556061e-07, "loss": 0.3217, "step": 6681 }, { "epoch": 0.894032646507894, "grad_norm": 33.42068099975586, "learning_rate": 1.0596735349210597e-07, "loss": 0.259, "step": 6682 }, { "epoch": 0.8941664436713942, "grad_norm": 42.75206756591797, "learning_rate": 1.0583355632860583e-07, "loss": 0.3638, "step": 6683 }, { "epoch": 0.8943002408348943, "grad_norm": 34.81935501098633, "learning_rate": 1.0569975916510569e-07, "loss": 0.3874, "step": 6684 }, { "epoch": 0.8944340379983944, "grad_norm": 40.49744415283203, "learning_rate": 1.0556596200160555e-07, "loss": 0.2828, "step": 6685 }, { "epoch": 0.8945678351618945, "grad_norm": 35.55107498168945, "learning_rate": 1.0543216483810542e-07, "loss": 0.2881, "step": 6686 }, { "epoch": 0.8947016323253947, "grad_norm": 41.165531158447266, "learning_rate": 1.0529836767460528e-07, "loss": 0.3256, "step": 6687 }, { "epoch": 0.8948354294888948, "grad_norm": 43.92138671875, "learning_rate": 1.0516457051110516e-07, "loss": 0.3987, "step": 6688 }, { "epoch": 0.894969226652395, "grad_norm": 50.200157165527344, "learning_rate": 1.0503077334760503e-07, "loss": 0.3641, "step": 6689 }, { "epoch": 0.8951030238158951, "grad_norm": 42.23887634277344, "learning_rate": 1.048969761841049e-07, "loss": 0.3273, "step": 6690 }, { "epoch": 0.8952368209793953, "grad_norm": 29.52501678466797, "learning_rate": 1.0476317902060476e-07, "loss": 0.141, "step": 6691 }, { "epoch": 0.8953706181428954, "grad_norm": 30.398277282714844, "learning_rate": 1.0462938185710463e-07, "loss": 0.2581, "step": 6692 }, { "epoch": 0.8955044153063955, "grad_norm": 52.3032112121582, "learning_rate": 1.044955846936045e-07, "loss": 0.5078, "step": 6693 }, { "epoch": 0.8956382124698956, "grad_norm": 28.67167091369629, "learning_rate": 1.0436178753010436e-07, "loss": 0.1683, "step": 6694 }, { "epoch": 0.8957720096333958, "grad_norm": 63.740692138671875, "learning_rate": 1.0422799036660423e-07, "loss": 0.4512, "step": 6695 }, { "epoch": 0.8959058067968959, "grad_norm": 34.5290412902832, "learning_rate": 1.0409419320310409e-07, "loss": 0.2087, "step": 6696 }, { "epoch": 0.8960396039603961, "grad_norm": 44.92140579223633, "learning_rate": 1.0396039603960394e-07, "loss": 0.4995, "step": 6697 }, { "epoch": 0.8961734011238962, "grad_norm": 40.384525299072266, "learning_rate": 1.0382659887610381e-07, "loss": 0.3322, "step": 6698 }, { "epoch": 0.8963071982873964, "grad_norm": 51.79157638549805, "learning_rate": 1.0369280171260369e-07, "loss": 0.2545, "step": 6699 }, { "epoch": 0.8964409954508964, "grad_norm": 35.62885665893555, "learning_rate": 1.0355900454910356e-07, "loss": 0.3315, "step": 6700 }, { "epoch": 0.8965747926143965, "grad_norm": 56.500980377197266, "learning_rate": 1.0342520738560342e-07, "loss": 0.5018, "step": 6701 }, { "epoch": 0.8967085897778967, "grad_norm": 44.16324234008789, "learning_rate": 1.0329141022210329e-07, "loss": 0.3456, "step": 6702 }, { "epoch": 0.8968423869413968, "grad_norm": 25.08957290649414, "learning_rate": 1.0315761305860315e-07, "loss": 0.2323, "step": 6703 }, { "epoch": 0.896976184104897, "grad_norm": 51.88188552856445, "learning_rate": 1.0302381589510302e-07, "loss": 0.3047, "step": 6704 }, { "epoch": 0.8971099812683971, "grad_norm": 45.251564025878906, "learning_rate": 1.0289001873160289e-07, "loss": 0.4682, "step": 6705 }, { "epoch": 0.8972437784318973, "grad_norm": 60.893043518066406, "learning_rate": 1.0275622156810275e-07, "loss": 0.4069, "step": 6706 }, { "epoch": 0.8973775755953973, "grad_norm": 37.60619354248047, "learning_rate": 1.0262242440460262e-07, "loss": 0.2721, "step": 6707 }, { "epoch": 0.8975113727588975, "grad_norm": 27.49346351623535, "learning_rate": 1.0248862724110248e-07, "loss": 0.2211, "step": 6708 }, { "epoch": 0.8976451699223976, "grad_norm": 49.95216369628906, "learning_rate": 1.0235483007760235e-07, "loss": 0.4486, "step": 6709 }, { "epoch": 0.8977789670858978, "grad_norm": 37.21920394897461, "learning_rate": 1.0222103291410222e-07, "loss": 0.2395, "step": 6710 }, { "epoch": 0.8979127642493979, "grad_norm": 40.20843505859375, "learning_rate": 1.0208723575060208e-07, "loss": 0.212, "step": 6711 }, { "epoch": 0.8980465614128981, "grad_norm": 37.23338317871094, "learning_rate": 1.0195343858710195e-07, "loss": 0.315, "step": 6712 }, { "epoch": 0.8981803585763982, "grad_norm": 38.98509979248047, "learning_rate": 1.0181964142360181e-07, "loss": 0.1889, "step": 6713 }, { "epoch": 0.8983141557398984, "grad_norm": 38.81325912475586, "learning_rate": 1.0168584426010168e-07, "loss": 0.2967, "step": 6714 }, { "epoch": 0.8984479529033984, "grad_norm": 51.13774871826172, "learning_rate": 1.0155204709660155e-07, "loss": 0.2475, "step": 6715 }, { "epoch": 0.8985817500668986, "grad_norm": 55.84059524536133, "learning_rate": 1.0141824993310141e-07, "loss": 0.3835, "step": 6716 }, { "epoch": 0.8987155472303987, "grad_norm": 55.15458679199219, "learning_rate": 1.0128445276960128e-07, "loss": 0.5633, "step": 6717 }, { "epoch": 0.8988493443938989, "grad_norm": 60.157012939453125, "learning_rate": 1.0115065560610114e-07, "loss": 0.3331, "step": 6718 }, { "epoch": 0.898983141557399, "grad_norm": 60.38639450073242, "learning_rate": 1.0101685844260101e-07, "loss": 0.5865, "step": 6719 }, { "epoch": 0.8991169387208992, "grad_norm": 31.868772506713867, "learning_rate": 1.0088306127910088e-07, "loss": 0.3328, "step": 6720 }, { "epoch": 0.8992507358843993, "grad_norm": 48.18416213989258, "learning_rate": 1.0074926411560074e-07, "loss": 0.4327, "step": 6721 }, { "epoch": 0.8993845330478993, "grad_norm": 26.922420501708984, "learning_rate": 1.0061546695210062e-07, "loss": 0.1751, "step": 6722 }, { "epoch": 0.8995183302113995, "grad_norm": 37.8258171081543, "learning_rate": 1.0048166978860048e-07, "loss": 0.3128, "step": 6723 }, { "epoch": 0.8996521273748996, "grad_norm": 56.49819564819336, "learning_rate": 1.0034787262510034e-07, "loss": 0.4326, "step": 6724 }, { "epoch": 0.8997859245383998, "grad_norm": 28.856151580810547, "learning_rate": 1.0021407546160021e-07, "loss": 0.1914, "step": 6725 }, { "epoch": 0.8999197217018999, "grad_norm": 44.85447311401367, "learning_rate": 1.0008027829810007e-07, "loss": 0.3237, "step": 6726 }, { "epoch": 0.9000535188654001, "grad_norm": 37.533390045166016, "learning_rate": 9.994648113459994e-08, "loss": 0.4772, "step": 6727 }, { "epoch": 0.9001873160289002, "grad_norm": 45.80540466308594, "learning_rate": 9.98126839710998e-08, "loss": 0.355, "step": 6728 }, { "epoch": 0.9003211131924003, "grad_norm": 31.0173282623291, "learning_rate": 9.967888680759967e-08, "loss": 0.3439, "step": 6729 }, { "epoch": 0.9004549103559004, "grad_norm": 72.26617431640625, "learning_rate": 9.954508964409954e-08, "loss": 0.5018, "step": 6730 }, { "epoch": 0.9005887075194006, "grad_norm": 25.803457260131836, "learning_rate": 9.94112924805994e-08, "loss": 0.2323, "step": 6731 }, { "epoch": 0.9007225046829007, "grad_norm": 24.644742965698242, "learning_rate": 9.927749531709927e-08, "loss": 0.2396, "step": 6732 }, { "epoch": 0.9008563018464009, "grad_norm": 64.48645782470703, "learning_rate": 9.914369815359915e-08, "loss": 0.3192, "step": 6733 }, { "epoch": 0.900990099009901, "grad_norm": 44.347286224365234, "learning_rate": 9.900990099009901e-08, "loss": 0.4985, "step": 6734 }, { "epoch": 0.9011238961734012, "grad_norm": 36.416683197021484, "learning_rate": 9.887610382659888e-08, "loss": 0.2333, "step": 6735 }, { "epoch": 0.9012576933369013, "grad_norm": 32.03694152832031, "learning_rate": 9.874230666309873e-08, "loss": 0.1874, "step": 6736 }, { "epoch": 0.9013914905004013, "grad_norm": 43.46795654296875, "learning_rate": 9.86085094995986e-08, "loss": 0.4207, "step": 6737 }, { "epoch": 0.9015252876639015, "grad_norm": 43.977664947509766, "learning_rate": 9.847471233609847e-08, "loss": 0.333, "step": 6738 }, { "epoch": 0.9016590848274016, "grad_norm": 29.27896499633789, "learning_rate": 9.834091517259833e-08, "loss": 0.2359, "step": 6739 }, { "epoch": 0.9017928819909018, "grad_norm": 34.18517303466797, "learning_rate": 9.82071180090982e-08, "loss": 0.314, "step": 6740 }, { "epoch": 0.901926679154402, "grad_norm": 34.5658073425293, "learning_rate": 9.807332084559806e-08, "loss": 0.3187, "step": 6741 }, { "epoch": 0.9020604763179021, "grad_norm": 46.68551254272461, "learning_rate": 9.793952368209793e-08, "loss": 0.2747, "step": 6742 }, { "epoch": 0.9021942734814022, "grad_norm": 37.1757926940918, "learning_rate": 9.78057265185978e-08, "loss": 0.31, "step": 6743 }, { "epoch": 0.9023280706449023, "grad_norm": 52.50199890136719, "learning_rate": 9.767192935509768e-08, "loss": 0.3182, "step": 6744 }, { "epoch": 0.9024618678084024, "grad_norm": 53.07481002807617, "learning_rate": 9.753813219159754e-08, "loss": 0.3797, "step": 6745 }, { "epoch": 0.9025956649719026, "grad_norm": 48.63065719604492, "learning_rate": 9.740433502809741e-08, "loss": 0.4357, "step": 6746 }, { "epoch": 0.9027294621354027, "grad_norm": 80.56129455566406, "learning_rate": 9.727053786459727e-08, "loss": 0.3988, "step": 6747 }, { "epoch": 0.9028632592989029, "grad_norm": 38.3481559753418, "learning_rate": 9.713674070109714e-08, "loss": 0.2145, "step": 6748 }, { "epoch": 0.902997056462403, "grad_norm": 79.31440734863281, "learning_rate": 9.700294353759699e-08, "loss": 0.291, "step": 6749 }, { "epoch": 0.9031308536259032, "grad_norm": 31.85471534729004, "learning_rate": 9.686914637409686e-08, "loss": 0.2313, "step": 6750 }, { "epoch": 0.9032646507894032, "grad_norm": 57.40138244628906, "learning_rate": 9.673534921059672e-08, "loss": 0.3796, "step": 6751 }, { "epoch": 0.9033984479529034, "grad_norm": 35.99618148803711, "learning_rate": 9.660155204709659e-08, "loss": 0.4201, "step": 6752 }, { "epoch": 0.9035322451164035, "grad_norm": 35.63972473144531, "learning_rate": 9.646775488359646e-08, "loss": 0.2308, "step": 6753 }, { "epoch": 0.9036660422799037, "grad_norm": 50.80629348754883, "learning_rate": 9.633395772009632e-08, "loss": 0.463, "step": 6754 }, { "epoch": 0.9037998394434038, "grad_norm": 64.13351440429688, "learning_rate": 9.62001605565962e-08, "loss": 0.489, "step": 6755 }, { "epoch": 0.903933636606904, "grad_norm": 55.906517028808594, "learning_rate": 9.606636339309607e-08, "loss": 0.4636, "step": 6756 }, { "epoch": 0.9040674337704041, "grad_norm": 64.41140747070312, "learning_rate": 9.593256622959593e-08, "loss": 0.4654, "step": 6757 }, { "epoch": 0.9042012309339043, "grad_norm": 46.79865264892578, "learning_rate": 9.57987690660958e-08, "loss": 0.3161, "step": 6758 }, { "epoch": 0.9043350280974043, "grad_norm": 41.53817367553711, "learning_rate": 9.566497190259567e-08, "loss": 0.2696, "step": 6759 }, { "epoch": 0.9044688252609044, "grad_norm": 43.84819793701172, "learning_rate": 9.553117473909553e-08, "loss": 0.2994, "step": 6760 }, { "epoch": 0.9046026224244046, "grad_norm": 45.91488265991211, "learning_rate": 9.53973775755954e-08, "loss": 0.4794, "step": 6761 }, { "epoch": 0.9047364195879047, "grad_norm": 42.27482604980469, "learning_rate": 9.526358041209525e-08, "loss": 0.267, "step": 6762 }, { "epoch": 0.9048702167514049, "grad_norm": 47.99414825439453, "learning_rate": 9.512978324859512e-08, "loss": 0.3117, "step": 6763 }, { "epoch": 0.905004013914905, "grad_norm": 47.20528030395508, "learning_rate": 9.499598608509498e-08, "loss": 0.4826, "step": 6764 }, { "epoch": 0.9051378110784052, "grad_norm": 32.64999008178711, "learning_rate": 9.486218892159485e-08, "loss": 0.1544, "step": 6765 }, { "epoch": 0.9052716082419052, "grad_norm": 41.19649887084961, "learning_rate": 9.472839175809473e-08, "loss": 0.3139, "step": 6766 }, { "epoch": 0.9054054054054054, "grad_norm": 42.99637985229492, "learning_rate": 9.45945945945946e-08, "loss": 0.3307, "step": 6767 }, { "epoch": 0.9055392025689055, "grad_norm": 40.90684509277344, "learning_rate": 9.446079743109446e-08, "loss": 0.3917, "step": 6768 }, { "epoch": 0.9056729997324057, "grad_norm": 28.384469985961914, "learning_rate": 9.432700026759433e-08, "loss": 0.2815, "step": 6769 }, { "epoch": 0.9058067968959058, "grad_norm": 22.45903778076172, "learning_rate": 9.419320310409419e-08, "loss": 0.1678, "step": 6770 }, { "epoch": 0.905940594059406, "grad_norm": 40.410316467285156, "learning_rate": 9.405940594059406e-08, "loss": 0.333, "step": 6771 }, { "epoch": 0.9060743912229061, "grad_norm": 62.03949737548828, "learning_rate": 9.392560877709392e-08, "loss": 0.2976, "step": 6772 }, { "epoch": 0.9062081883864062, "grad_norm": 58.67509078979492, "learning_rate": 9.379181161359379e-08, "loss": 0.462, "step": 6773 }, { "epoch": 0.9063419855499063, "grad_norm": 35.02092361450195, "learning_rate": 9.365801445009366e-08, "loss": 0.3348, "step": 6774 }, { "epoch": 0.9064757827134065, "grad_norm": 39.66336441040039, "learning_rate": 9.352421728659351e-08, "loss": 0.3888, "step": 6775 }, { "epoch": 0.9066095798769066, "grad_norm": 35.43662643432617, "learning_rate": 9.339042012309337e-08, "loss": 0.3924, "step": 6776 }, { "epoch": 0.9067433770404068, "grad_norm": 87.93084716796875, "learning_rate": 9.325662295959325e-08, "loss": 0.7113, "step": 6777 }, { "epoch": 0.9068771742039069, "grad_norm": 58.95281219482422, "learning_rate": 9.312282579609312e-08, "loss": 0.4835, "step": 6778 }, { "epoch": 0.907010971367407, "grad_norm": 52.735626220703125, "learning_rate": 9.298902863259299e-08, "loss": 0.3612, "step": 6779 }, { "epoch": 0.9071447685309072, "grad_norm": 72.49583435058594, "learning_rate": 9.285523146909285e-08, "loss": 0.5299, "step": 6780 }, { "epoch": 0.9072785656944072, "grad_norm": 39.72298049926758, "learning_rate": 9.272143430559272e-08, "loss": 0.4243, "step": 6781 }, { "epoch": 0.9074123628579074, "grad_norm": 53.85466384887695, "learning_rate": 9.258763714209258e-08, "loss": 0.3314, "step": 6782 }, { "epoch": 0.9075461600214075, "grad_norm": 39.24352264404297, "learning_rate": 9.245383997859245e-08, "loss": 0.3568, "step": 6783 }, { "epoch": 0.9076799571849077, "grad_norm": 16.667917251586914, "learning_rate": 9.232004281509232e-08, "loss": 0.115, "step": 6784 }, { "epoch": 0.9078137543484078, "grad_norm": 48.82355499267578, "learning_rate": 9.218624565159218e-08, "loss": 0.4187, "step": 6785 }, { "epoch": 0.907947551511908, "grad_norm": 73.1860580444336, "learning_rate": 9.205244848809205e-08, "loss": 0.8639, "step": 6786 }, { "epoch": 0.9080813486754081, "grad_norm": 38.58263397216797, "learning_rate": 9.191865132459191e-08, "loss": 0.2289, "step": 6787 }, { "epoch": 0.9082151458389082, "grad_norm": 28.693788528442383, "learning_rate": 9.17848541610918e-08, "loss": 0.3238, "step": 6788 }, { "epoch": 0.9083489430024083, "grad_norm": 36.77587890625, "learning_rate": 9.165105699759165e-08, "loss": 0.2716, "step": 6789 }, { "epoch": 0.9084827401659085, "grad_norm": 16.86024284362793, "learning_rate": 9.151725983409151e-08, "loss": 0.1777, "step": 6790 }, { "epoch": 0.9086165373294086, "grad_norm": 35.544342041015625, "learning_rate": 9.138346267059138e-08, "loss": 0.407, "step": 6791 }, { "epoch": 0.9087503344929088, "grad_norm": 43.051876068115234, "learning_rate": 9.124966550709124e-08, "loss": 0.3716, "step": 6792 }, { "epoch": 0.9088841316564089, "grad_norm": 40.17816925048828, "learning_rate": 9.111586834359111e-08, "loss": 0.3881, "step": 6793 }, { "epoch": 0.9090179288199091, "grad_norm": 39.49322509765625, "learning_rate": 9.098207118009098e-08, "loss": 0.3521, "step": 6794 }, { "epoch": 0.9091517259834091, "grad_norm": 55.0706672668457, "learning_rate": 9.084827401659084e-08, "loss": 0.4265, "step": 6795 }, { "epoch": 0.9092855231469092, "grad_norm": 52.91558837890625, "learning_rate": 9.071447685309071e-08, "loss": 0.3602, "step": 6796 }, { "epoch": 0.9094193203104094, "grad_norm": 35.698333740234375, "learning_rate": 9.058067968959057e-08, "loss": 0.3034, "step": 6797 }, { "epoch": 0.9095531174739095, "grad_norm": 25.804182052612305, "learning_rate": 9.044688252609044e-08, "loss": 0.3087, "step": 6798 }, { "epoch": 0.9096869146374097, "grad_norm": 77.6063232421875, "learning_rate": 9.031308536259031e-08, "loss": 0.4224, "step": 6799 }, { "epoch": 0.9098207118009098, "grad_norm": 52.88320541381836, "learning_rate": 9.017928819909019e-08, "loss": 0.4748, "step": 6800 }, { "epoch": 0.90995450896441, "grad_norm": 46.63367462158203, "learning_rate": 9.004549103559005e-08, "loss": 0.2798, "step": 6801 }, { "epoch": 0.9100883061279101, "grad_norm": 42.1827392578125, "learning_rate": 8.99116938720899e-08, "loss": 0.3851, "step": 6802 }, { "epoch": 0.9102221032914102, "grad_norm": 58.173240661621094, "learning_rate": 8.977789670858977e-08, "loss": 0.3557, "step": 6803 }, { "epoch": 0.9103559004549103, "grad_norm": 37.18952178955078, "learning_rate": 8.964409954508964e-08, "loss": 0.2397, "step": 6804 }, { "epoch": 0.9104896976184105, "grad_norm": 30.85306739807129, "learning_rate": 8.95103023815895e-08, "loss": 0.3943, "step": 6805 }, { "epoch": 0.9106234947819106, "grad_norm": 28.030492782592773, "learning_rate": 8.937650521808937e-08, "loss": 0.3216, "step": 6806 }, { "epoch": 0.9107572919454108, "grad_norm": 37.97477722167969, "learning_rate": 8.924270805458924e-08, "loss": 0.2455, "step": 6807 }, { "epoch": 0.9108910891089109, "grad_norm": 28.880817413330078, "learning_rate": 8.91089108910891e-08, "loss": 0.2641, "step": 6808 }, { "epoch": 0.9110248862724111, "grad_norm": 36.117950439453125, "learning_rate": 8.897511372758897e-08, "loss": 0.2992, "step": 6809 }, { "epoch": 0.9111586834359111, "grad_norm": 35.852195739746094, "learning_rate": 8.884131656408883e-08, "loss": 0.1851, "step": 6810 }, { "epoch": 0.9112924805994113, "grad_norm": 59.27960205078125, "learning_rate": 8.870751940058871e-08, "loss": 0.3943, "step": 6811 }, { "epoch": 0.9114262777629114, "grad_norm": 42.469032287597656, "learning_rate": 8.857372223708858e-08, "loss": 0.3788, "step": 6812 }, { "epoch": 0.9115600749264116, "grad_norm": 47.309303283691406, "learning_rate": 8.843992507358844e-08, "loss": 0.4977, "step": 6813 }, { "epoch": 0.9116938720899117, "grad_norm": 20.096025466918945, "learning_rate": 8.830612791008831e-08, "loss": 0.0902, "step": 6814 }, { "epoch": 0.9118276692534119, "grad_norm": 45.50159454345703, "learning_rate": 8.817233074658816e-08, "loss": 0.4029, "step": 6815 }, { "epoch": 0.911961466416912, "grad_norm": 36.73318099975586, "learning_rate": 8.803853358308803e-08, "loss": 0.2735, "step": 6816 }, { "epoch": 0.912095263580412, "grad_norm": 43.839595794677734, "learning_rate": 8.79047364195879e-08, "loss": 0.3264, "step": 6817 }, { "epoch": 0.9122290607439122, "grad_norm": 40.21220397949219, "learning_rate": 8.777093925608776e-08, "loss": 0.3045, "step": 6818 }, { "epoch": 0.9123628579074123, "grad_norm": 24.73957633972168, "learning_rate": 8.763714209258763e-08, "loss": 0.3111, "step": 6819 }, { "epoch": 0.9124966550709125, "grad_norm": 44.43389129638672, "learning_rate": 8.75033449290875e-08, "loss": 0.1677, "step": 6820 }, { "epoch": 0.9126304522344126, "grad_norm": 30.960277557373047, "learning_rate": 8.736954776558736e-08, "loss": 0.1885, "step": 6821 }, { "epoch": 0.9127642493979128, "grad_norm": 68.28404235839844, "learning_rate": 8.723575060208724e-08, "loss": 0.5545, "step": 6822 }, { "epoch": 0.9128980465614129, "grad_norm": 65.16172790527344, "learning_rate": 8.71019534385871e-08, "loss": 0.7323, "step": 6823 }, { "epoch": 0.9130318437249131, "grad_norm": 80.66436004638672, "learning_rate": 8.696815627508697e-08, "loss": 0.6391, "step": 6824 }, { "epoch": 0.9131656408884131, "grad_norm": 29.44377326965332, "learning_rate": 8.683435911158684e-08, "loss": 0.2747, "step": 6825 }, { "epoch": 0.9132994380519133, "grad_norm": 41.211238861083984, "learning_rate": 8.67005619480867e-08, "loss": 0.4213, "step": 6826 }, { "epoch": 0.9134332352154134, "grad_norm": 46.34832763671875, "learning_rate": 8.656676478458657e-08, "loss": 0.4453, "step": 6827 }, { "epoch": 0.9135670323789136, "grad_norm": 55.36542510986328, "learning_rate": 8.643296762108642e-08, "loss": 0.4502, "step": 6828 }, { "epoch": 0.9137008295424137, "grad_norm": 37.59425354003906, "learning_rate": 8.629917045758629e-08, "loss": 0.3558, "step": 6829 }, { "epoch": 0.9138346267059139, "grad_norm": 50.66817092895508, "learning_rate": 8.616537329408615e-08, "loss": 0.4486, "step": 6830 }, { "epoch": 0.913968423869414, "grad_norm": 57.28788757324219, "learning_rate": 8.603157613058602e-08, "loss": 0.4896, "step": 6831 }, { "epoch": 0.914102221032914, "grad_norm": 18.535003662109375, "learning_rate": 8.589777896708589e-08, "loss": 0.1477, "step": 6832 }, { "epoch": 0.9142360181964142, "grad_norm": 31.112659454345703, "learning_rate": 8.576398180358577e-08, "loss": 0.1836, "step": 6833 }, { "epoch": 0.9143698153599144, "grad_norm": 31.71688461303711, "learning_rate": 8.563018464008563e-08, "loss": 0.2627, "step": 6834 }, { "epoch": 0.9145036125234145, "grad_norm": 49.6506462097168, "learning_rate": 8.54963874765855e-08, "loss": 0.372, "step": 6835 }, { "epoch": 0.9146374096869146, "grad_norm": 34.61507797241211, "learning_rate": 8.536259031308536e-08, "loss": 0.2415, "step": 6836 }, { "epoch": 0.9147712068504148, "grad_norm": 50.446319580078125, "learning_rate": 8.522879314958523e-08, "loss": 0.4015, "step": 6837 }, { "epoch": 0.914905004013915, "grad_norm": 76.69453430175781, "learning_rate": 8.50949959860851e-08, "loss": 0.6053, "step": 6838 }, { "epoch": 0.915038801177415, "grad_norm": 73.16879272460938, "learning_rate": 8.496119882258496e-08, "loss": 0.5331, "step": 6839 }, { "epoch": 0.9151725983409151, "grad_norm": 37.058074951171875, "learning_rate": 8.482740165908483e-08, "loss": 0.3503, "step": 6840 }, { "epoch": 0.9153063955044153, "grad_norm": 63.24678039550781, "learning_rate": 8.469360449558468e-08, "loss": 0.6567, "step": 6841 }, { "epoch": 0.9154401926679154, "grad_norm": 33.57240676879883, "learning_rate": 8.455980733208455e-08, "loss": 0.2035, "step": 6842 }, { "epoch": 0.9155739898314156, "grad_norm": 53.170684814453125, "learning_rate": 8.442601016858441e-08, "loss": 0.5191, "step": 6843 }, { "epoch": 0.9157077869949157, "grad_norm": 41.64584732055664, "learning_rate": 8.429221300508429e-08, "loss": 0.4354, "step": 6844 }, { "epoch": 0.9158415841584159, "grad_norm": 47.23472213745117, "learning_rate": 8.415841584158416e-08, "loss": 0.2367, "step": 6845 }, { "epoch": 0.915975381321916, "grad_norm": 40.75728225708008, "learning_rate": 8.402461867808402e-08, "loss": 0.3564, "step": 6846 }, { "epoch": 0.9161091784854161, "grad_norm": 36.644256591796875, "learning_rate": 8.389082151458389e-08, "loss": 0.3166, "step": 6847 }, { "epoch": 0.9162429756489162, "grad_norm": 56.39495086669922, "learning_rate": 8.375702435108376e-08, "loss": 0.6251, "step": 6848 }, { "epoch": 0.9163767728124164, "grad_norm": 35.7569465637207, "learning_rate": 8.362322718758362e-08, "loss": 0.3259, "step": 6849 }, { "epoch": 0.9165105699759165, "grad_norm": 46.03322219848633, "learning_rate": 8.348943002408349e-08, "loss": 0.2809, "step": 6850 }, { "epoch": 0.9166443671394167, "grad_norm": 31.291343688964844, "learning_rate": 8.335563286058335e-08, "loss": 0.2135, "step": 6851 }, { "epoch": 0.9167781643029168, "grad_norm": 72.48059844970703, "learning_rate": 8.322183569708322e-08, "loss": 0.652, "step": 6852 }, { "epoch": 0.916911961466417, "grad_norm": 27.674348831176758, "learning_rate": 8.308803853358309e-08, "loss": 0.3547, "step": 6853 }, { "epoch": 0.917045758629917, "grad_norm": 48.52094268798828, "learning_rate": 8.295424137008294e-08, "loss": 0.5426, "step": 6854 }, { "epoch": 0.9171795557934171, "grad_norm": 31.38462257385254, "learning_rate": 8.282044420658282e-08, "loss": 0.3013, "step": 6855 }, { "epoch": 0.9173133529569173, "grad_norm": 44.83040237426758, "learning_rate": 8.268664704308268e-08, "loss": 0.4256, "step": 6856 }, { "epoch": 0.9174471501204174, "grad_norm": 40.716094970703125, "learning_rate": 8.255284987958255e-08, "loss": 0.3466, "step": 6857 }, { "epoch": 0.9175809472839176, "grad_norm": 37.67740249633789, "learning_rate": 8.241905271608242e-08, "loss": 0.2922, "step": 6858 }, { "epoch": 0.9177147444474177, "grad_norm": 34.621341705322266, "learning_rate": 8.228525555258228e-08, "loss": 0.2625, "step": 6859 }, { "epoch": 0.9178485416109179, "grad_norm": 41.562198638916016, "learning_rate": 8.215145838908215e-08, "loss": 0.3945, "step": 6860 }, { "epoch": 0.9179823387744179, "grad_norm": 35.76835632324219, "learning_rate": 8.201766122558201e-08, "loss": 0.4051, "step": 6861 }, { "epoch": 0.9181161359379181, "grad_norm": 52.35306167602539, "learning_rate": 8.188386406208188e-08, "loss": 0.4065, "step": 6862 }, { "epoch": 0.9182499331014182, "grad_norm": 57.068824768066406, "learning_rate": 8.175006689858175e-08, "loss": 0.4501, "step": 6863 }, { "epoch": 0.9183837302649184, "grad_norm": 60.444881439208984, "learning_rate": 8.161626973508161e-08, "loss": 0.231, "step": 6864 }, { "epoch": 0.9185175274284185, "grad_norm": 39.45391845703125, "learning_rate": 8.148247257158148e-08, "loss": 0.3631, "step": 6865 }, { "epoch": 0.9186513245919187, "grad_norm": 54.60417175292969, "learning_rate": 8.134867540808134e-08, "loss": 0.4782, "step": 6866 }, { "epoch": 0.9187851217554188, "grad_norm": 36.06803894042969, "learning_rate": 8.121487824458121e-08, "loss": 0.2731, "step": 6867 }, { "epoch": 0.918918918918919, "grad_norm": 42.307403564453125, "learning_rate": 8.108108108108108e-08, "loss": 0.2936, "step": 6868 }, { "epoch": 0.919052716082419, "grad_norm": 60.505393981933594, "learning_rate": 8.094728391758094e-08, "loss": 0.5151, "step": 6869 }, { "epoch": 0.9191865132459192, "grad_norm": 42.08151626586914, "learning_rate": 8.081348675408081e-08, "loss": 0.4747, "step": 6870 }, { "epoch": 0.9193203104094193, "grad_norm": 67.05402374267578, "learning_rate": 8.067968959058067e-08, "loss": 0.3509, "step": 6871 }, { "epoch": 0.9194541075729195, "grad_norm": 31.234479904174805, "learning_rate": 8.054589242708054e-08, "loss": 0.3581, "step": 6872 }, { "epoch": 0.9195879047364196, "grad_norm": 51.12786102294922, "learning_rate": 8.041209526358041e-08, "loss": 0.4223, "step": 6873 }, { "epoch": 0.9197217018999198, "grad_norm": 27.604101181030273, "learning_rate": 8.027829810008027e-08, "loss": 0.211, "step": 6874 }, { "epoch": 0.9198554990634199, "grad_norm": 20.83241844177246, "learning_rate": 8.014450093658014e-08, "loss": 0.2251, "step": 6875 }, { "epoch": 0.9199892962269199, "grad_norm": 41.070770263671875, "learning_rate": 8.001070377308e-08, "loss": 0.3213, "step": 6876 }, { "epoch": 0.9201230933904201, "grad_norm": 44.006717681884766, "learning_rate": 7.987690660957987e-08, "loss": 0.3545, "step": 6877 }, { "epoch": 0.9202568905539202, "grad_norm": 33.8167724609375, "learning_rate": 7.974310944607975e-08, "loss": 0.2305, "step": 6878 }, { "epoch": 0.9203906877174204, "grad_norm": 46.28071594238281, "learning_rate": 7.960931228257962e-08, "loss": 0.4592, "step": 6879 }, { "epoch": 0.9205244848809205, "grad_norm": 41.43735122680664, "learning_rate": 7.947551511907947e-08, "loss": 0.337, "step": 6880 }, { "epoch": 0.9206582820444207, "grad_norm": 65.74829864501953, "learning_rate": 7.934171795557934e-08, "loss": 0.4239, "step": 6881 }, { "epoch": 0.9207920792079208, "grad_norm": 48.25659942626953, "learning_rate": 7.92079207920792e-08, "loss": 0.2616, "step": 6882 }, { "epoch": 0.9209258763714209, "grad_norm": 44.39081954956055, "learning_rate": 7.907412362857907e-08, "loss": 0.1912, "step": 6883 }, { "epoch": 0.921059673534921, "grad_norm": 28.546939849853516, "learning_rate": 7.894032646507893e-08, "loss": 0.2274, "step": 6884 }, { "epoch": 0.9211934706984212, "grad_norm": 45.16600036621094, "learning_rate": 7.88065293015788e-08, "loss": 0.4121, "step": 6885 }, { "epoch": 0.9213272678619213, "grad_norm": 22.345550537109375, "learning_rate": 7.867273213807867e-08, "loss": 0.111, "step": 6886 }, { "epoch": 0.9214610650254215, "grad_norm": 35.06914138793945, "learning_rate": 7.853893497457853e-08, "loss": 0.2473, "step": 6887 }, { "epoch": 0.9215948621889216, "grad_norm": 31.46977996826172, "learning_rate": 7.84051378110784e-08, "loss": 0.3152, "step": 6888 }, { "epoch": 0.9217286593524218, "grad_norm": 45.92245101928711, "learning_rate": 7.827134064757828e-08, "loss": 0.3514, "step": 6889 }, { "epoch": 0.9218624565159219, "grad_norm": 23.945898056030273, "learning_rate": 7.813754348407814e-08, "loss": 0.1799, "step": 6890 }, { "epoch": 0.921996253679422, "grad_norm": 59.137874603271484, "learning_rate": 7.800374632057801e-08, "loss": 0.2986, "step": 6891 }, { "epoch": 0.9221300508429221, "grad_norm": 82.55567169189453, "learning_rate": 7.786994915707787e-08, "loss": 0.5083, "step": 6892 }, { "epoch": 0.9222638480064222, "grad_norm": 26.545595169067383, "learning_rate": 7.773615199357773e-08, "loss": 0.1725, "step": 6893 }, { "epoch": 0.9223976451699224, "grad_norm": 46.86634063720703, "learning_rate": 7.76023548300776e-08, "loss": 0.4201, "step": 6894 }, { "epoch": 0.9225314423334225, "grad_norm": 24.8587703704834, "learning_rate": 7.746855766657746e-08, "loss": 0.2049, "step": 6895 }, { "epoch": 0.9226652394969227, "grad_norm": 35.149417877197266, "learning_rate": 7.733476050307733e-08, "loss": 0.2153, "step": 6896 }, { "epoch": 0.9227990366604228, "grad_norm": 49.45922088623047, "learning_rate": 7.720096333957719e-08, "loss": 0.3473, "step": 6897 }, { "epoch": 0.9229328338239229, "grad_norm": 39.24463653564453, "learning_rate": 7.706716617607706e-08, "loss": 0.3143, "step": 6898 }, { "epoch": 0.923066630987423, "grad_norm": 56.99187088012695, "learning_rate": 7.693336901257692e-08, "loss": 0.3512, "step": 6899 }, { "epoch": 0.9232004281509232, "grad_norm": 157.88107299804688, "learning_rate": 7.67995718490768e-08, "loss": 0.7564, "step": 6900 }, { "epoch": 0.9233342253144233, "grad_norm": 38.0614013671875, "learning_rate": 7.666577468557667e-08, "loss": 0.227, "step": 6901 }, { "epoch": 0.9234680224779235, "grad_norm": 45.04033660888672, "learning_rate": 7.653197752207654e-08, "loss": 0.4707, "step": 6902 }, { "epoch": 0.9236018196414236, "grad_norm": 56.75408935546875, "learning_rate": 7.63981803585764e-08, "loss": 0.2883, "step": 6903 }, { "epoch": 0.9237356168049238, "grad_norm": 71.94441986083984, "learning_rate": 7.626438319507627e-08, "loss": 0.4265, "step": 6904 }, { "epoch": 0.9238694139684239, "grad_norm": 41.915462493896484, "learning_rate": 7.613058603157613e-08, "loss": 0.326, "step": 6905 }, { "epoch": 0.924003211131924, "grad_norm": 45.83854293823242, "learning_rate": 7.599678886807599e-08, "loss": 0.3178, "step": 6906 }, { "epoch": 0.9241370082954241, "grad_norm": 43.28507614135742, "learning_rate": 7.586299170457585e-08, "loss": 0.2816, "step": 6907 }, { "epoch": 0.9242708054589243, "grad_norm": 22.421289443969727, "learning_rate": 7.572919454107572e-08, "loss": 0.2101, "step": 6908 }, { "epoch": 0.9244046026224244, "grad_norm": 37.0958251953125, "learning_rate": 7.559539737757558e-08, "loss": 0.3969, "step": 6909 }, { "epoch": 0.9245383997859246, "grad_norm": 36.38124084472656, "learning_rate": 7.546160021407545e-08, "loss": 0.301, "step": 6910 }, { "epoch": 0.9246721969494247, "grad_norm": 48.44988250732422, "learning_rate": 7.532780305057533e-08, "loss": 0.426, "step": 6911 }, { "epoch": 0.9248059941129249, "grad_norm": 41.634803771972656, "learning_rate": 7.51940058870752e-08, "loss": 0.5018, "step": 6912 }, { "epoch": 0.9249397912764249, "grad_norm": 45.44672393798828, "learning_rate": 7.506020872357506e-08, "loss": 0.365, "step": 6913 }, { "epoch": 0.925073588439925, "grad_norm": 34.116966247558594, "learning_rate": 7.492641156007493e-08, "loss": 0.1934, "step": 6914 }, { "epoch": 0.9252073856034252, "grad_norm": 43.10159683227539, "learning_rate": 7.47926143965748e-08, "loss": 0.4276, "step": 6915 }, { "epoch": 0.9253411827669253, "grad_norm": 40.407081604003906, "learning_rate": 7.465881723307466e-08, "loss": 0.1463, "step": 6916 }, { "epoch": 0.9254749799304255, "grad_norm": 58.958335876464844, "learning_rate": 7.452502006957453e-08, "loss": 0.4302, "step": 6917 }, { "epoch": 0.9256087770939256, "grad_norm": 16.63587188720703, "learning_rate": 7.439122290607439e-08, "loss": 0.1358, "step": 6918 }, { "epoch": 0.9257425742574258, "grad_norm": 34.05344772338867, "learning_rate": 7.425742574257424e-08, "loss": 0.3495, "step": 6919 }, { "epoch": 0.9258763714209258, "grad_norm": 36.131004333496094, "learning_rate": 7.412362857907411e-08, "loss": 0.2, "step": 6920 }, { "epoch": 0.926010168584426, "grad_norm": 60.78036117553711, "learning_rate": 7.398983141557398e-08, "loss": 0.3422, "step": 6921 }, { "epoch": 0.9261439657479261, "grad_norm": 53.486820220947266, "learning_rate": 7.385603425207386e-08, "loss": 0.3555, "step": 6922 }, { "epoch": 0.9262777629114263, "grad_norm": 43.14543914794922, "learning_rate": 7.372223708857372e-08, "loss": 0.3481, "step": 6923 }, { "epoch": 0.9264115600749264, "grad_norm": 38.128089904785156, "learning_rate": 7.358843992507359e-08, "loss": 0.3264, "step": 6924 }, { "epoch": 0.9265453572384266, "grad_norm": 39.65620040893555, "learning_rate": 7.345464276157345e-08, "loss": 0.3397, "step": 6925 }, { "epoch": 0.9266791544019267, "grad_norm": 35.22130584716797, "learning_rate": 7.332084559807332e-08, "loss": 0.1983, "step": 6926 }, { "epoch": 0.9268129515654269, "grad_norm": 35.33755874633789, "learning_rate": 7.318704843457319e-08, "loss": 0.2312, "step": 6927 }, { "epoch": 0.9269467487289269, "grad_norm": 75.7868881225586, "learning_rate": 7.305325127107305e-08, "loss": 0.6303, "step": 6928 }, { "epoch": 0.927080545892427, "grad_norm": 40.770198822021484, "learning_rate": 7.291945410757292e-08, "loss": 0.3657, "step": 6929 }, { "epoch": 0.9272143430559272, "grad_norm": 53.454612731933594, "learning_rate": 7.278565694407278e-08, "loss": 0.4068, "step": 6930 }, { "epoch": 0.9273481402194274, "grad_norm": 42.157100677490234, "learning_rate": 7.265185978057265e-08, "loss": 0.3392, "step": 6931 }, { "epoch": 0.9274819373829275, "grad_norm": 60.510955810546875, "learning_rate": 7.25180626170725e-08, "loss": 0.3773, "step": 6932 }, { "epoch": 0.9276157345464277, "grad_norm": 42.256927490234375, "learning_rate": 7.238426545357238e-08, "loss": 0.4238, "step": 6933 }, { "epoch": 0.9277495317099278, "grad_norm": 84.77433776855469, "learning_rate": 7.225046829007225e-08, "loss": 0.6357, "step": 6934 }, { "epoch": 0.9278833288734278, "grad_norm": 21.188196182250977, "learning_rate": 7.211667112657211e-08, "loss": 0.1643, "step": 6935 }, { "epoch": 0.928017126036928, "grad_norm": 49.154945373535156, "learning_rate": 7.198287396307198e-08, "loss": 0.3971, "step": 6936 }, { "epoch": 0.9281509232004281, "grad_norm": 32.7579345703125, "learning_rate": 7.184907679957185e-08, "loss": 0.1081, "step": 6937 }, { "epoch": 0.9282847203639283, "grad_norm": 39.576541900634766, "learning_rate": 7.171527963607171e-08, "loss": 0.2329, "step": 6938 }, { "epoch": 0.9284185175274284, "grad_norm": 44.66694641113281, "learning_rate": 7.158148247257158e-08, "loss": 0.1967, "step": 6939 }, { "epoch": 0.9285523146909286, "grad_norm": 36.220741271972656, "learning_rate": 7.144768530907144e-08, "loss": 0.3294, "step": 6940 }, { "epoch": 0.9286861118544287, "grad_norm": 35.99663162231445, "learning_rate": 7.131388814557131e-08, "loss": 0.1893, "step": 6941 }, { "epoch": 0.9288199090179288, "grad_norm": 34.16707992553711, "learning_rate": 7.118009098207118e-08, "loss": 0.2285, "step": 6942 }, { "epoch": 0.9289537061814289, "grad_norm": 64.10503387451172, "learning_rate": 7.104629381857104e-08, "loss": 0.3731, "step": 6943 }, { "epoch": 0.9290875033449291, "grad_norm": 41.25271987915039, "learning_rate": 7.091249665507091e-08, "loss": 0.2896, "step": 6944 }, { "epoch": 0.9292213005084292, "grad_norm": 23.126869201660156, "learning_rate": 7.077869949157079e-08, "loss": 0.1443, "step": 6945 }, { "epoch": 0.9293550976719294, "grad_norm": 38.734493255615234, "learning_rate": 7.064490232807064e-08, "loss": 0.3906, "step": 6946 }, { "epoch": 0.9294888948354295, "grad_norm": 36.26934051513672, "learning_rate": 7.051110516457051e-08, "loss": 0.2648, "step": 6947 }, { "epoch": 0.9296226919989297, "grad_norm": 33.63374328613281, "learning_rate": 7.037730800107037e-08, "loss": 0.2091, "step": 6948 }, { "epoch": 0.9297564891624298, "grad_norm": 42.79764938354492, "learning_rate": 7.024351083757024e-08, "loss": 0.3046, "step": 6949 }, { "epoch": 0.9298902863259298, "grad_norm": 53.07500457763672, "learning_rate": 7.01097136740701e-08, "loss": 0.5302, "step": 6950 }, { "epoch": 0.93002408348943, "grad_norm": 65.73275756835938, "learning_rate": 6.997591651056997e-08, "loss": 0.4055, "step": 6951 }, { "epoch": 0.9301578806529301, "grad_norm": 17.76319122314453, "learning_rate": 6.984211934706984e-08, "loss": 0.1088, "step": 6952 }, { "epoch": 0.9302916778164303, "grad_norm": 52.876976013183594, "learning_rate": 6.97083221835697e-08, "loss": 0.556, "step": 6953 }, { "epoch": 0.9304254749799304, "grad_norm": 53.97183609008789, "learning_rate": 6.957452502006957e-08, "loss": 0.3848, "step": 6954 }, { "epoch": 0.9305592721434306, "grad_norm": 39.722068786621094, "learning_rate": 6.944072785656944e-08, "loss": 0.2669, "step": 6955 }, { "epoch": 0.9306930693069307, "grad_norm": 47.593223571777344, "learning_rate": 6.930693069306931e-08, "loss": 0.3211, "step": 6956 }, { "epoch": 0.9308268664704308, "grad_norm": 39.54585647583008, "learning_rate": 6.917313352956918e-08, "loss": 0.3187, "step": 6957 }, { "epoch": 0.9309606636339309, "grad_norm": 62.56272506713867, "learning_rate": 6.903933636606905e-08, "loss": 0.5046, "step": 6958 }, { "epoch": 0.9310944607974311, "grad_norm": 64.43444061279297, "learning_rate": 6.89055392025689e-08, "loss": 0.2071, "step": 6959 }, { "epoch": 0.9312282579609312, "grad_norm": 31.699975967407227, "learning_rate": 6.877174203906877e-08, "loss": 0.2256, "step": 6960 }, { "epoch": 0.9313620551244314, "grad_norm": 43.34178924560547, "learning_rate": 6.863794487556863e-08, "loss": 0.2464, "step": 6961 }, { "epoch": 0.9314958522879315, "grad_norm": 55.73397445678711, "learning_rate": 6.85041477120685e-08, "loss": 0.3493, "step": 6962 }, { "epoch": 0.9316296494514317, "grad_norm": 38.29203414916992, "learning_rate": 6.837035054856836e-08, "loss": 0.2652, "step": 6963 }, { "epoch": 0.9317634466149317, "grad_norm": 33.94950866699219, "learning_rate": 6.823655338506823e-08, "loss": 0.1745, "step": 6964 }, { "epoch": 0.9318972437784319, "grad_norm": 52.39934158325195, "learning_rate": 6.81027562215681e-08, "loss": 0.336, "step": 6965 }, { "epoch": 0.932031040941932, "grad_norm": 51.04881286621094, "learning_rate": 6.796895905806796e-08, "loss": 0.4161, "step": 6966 }, { "epoch": 0.9321648381054322, "grad_norm": 40.36384582519531, "learning_rate": 6.783516189456784e-08, "loss": 0.1873, "step": 6967 }, { "epoch": 0.9322986352689323, "grad_norm": 91.07634735107422, "learning_rate": 6.770136473106771e-08, "loss": 0.3631, "step": 6968 }, { "epoch": 0.9324324324324325, "grad_norm": 58.83032989501953, "learning_rate": 6.756756756756757e-08, "loss": 0.5154, "step": 6969 }, { "epoch": 0.9325662295959326, "grad_norm": 53.16892623901367, "learning_rate": 6.743377040406744e-08, "loss": 0.3798, "step": 6970 }, { "epoch": 0.9327000267594328, "grad_norm": 44.593475341796875, "learning_rate": 6.72999732405673e-08, "loss": 0.3386, "step": 6971 }, { "epoch": 0.9328338239229328, "grad_norm": 66.19639587402344, "learning_rate": 6.716617607706716e-08, "loss": 0.5249, "step": 6972 }, { "epoch": 0.9329676210864329, "grad_norm": 30.927000045776367, "learning_rate": 6.703237891356702e-08, "loss": 0.304, "step": 6973 }, { "epoch": 0.9331014182499331, "grad_norm": 44.640464782714844, "learning_rate": 6.689858175006689e-08, "loss": 0.2378, "step": 6974 }, { "epoch": 0.9332352154134332, "grad_norm": 40.6358757019043, "learning_rate": 6.676478458656676e-08, "loss": 0.2349, "step": 6975 }, { "epoch": 0.9333690125769334, "grad_norm": 25.456867218017578, "learning_rate": 6.663098742306662e-08, "loss": 0.1923, "step": 6976 }, { "epoch": 0.9335028097404335, "grad_norm": 47.20842742919922, "learning_rate": 6.649719025956649e-08, "loss": 0.3977, "step": 6977 }, { "epoch": 0.9336366069039337, "grad_norm": 44.24525451660156, "learning_rate": 6.636339309606637e-08, "loss": 0.1455, "step": 6978 }, { "epoch": 0.9337704040674337, "grad_norm": 69.01238250732422, "learning_rate": 6.622959593256623e-08, "loss": 0.5161, "step": 6979 }, { "epoch": 0.9339042012309339, "grad_norm": 53.255958557128906, "learning_rate": 6.60957987690661e-08, "loss": 0.2216, "step": 6980 }, { "epoch": 0.934037998394434, "grad_norm": 39.415592193603516, "learning_rate": 6.596200160556597e-08, "loss": 0.2891, "step": 6981 }, { "epoch": 0.9341717955579342, "grad_norm": 42.86985778808594, "learning_rate": 6.582820444206583e-08, "loss": 0.2012, "step": 6982 }, { "epoch": 0.9343055927214343, "grad_norm": 31.42423439025879, "learning_rate": 6.56944072785657e-08, "loss": 0.2596, "step": 6983 }, { "epoch": 0.9344393898849345, "grad_norm": 53.01652145385742, "learning_rate": 6.556061011506555e-08, "loss": 0.2322, "step": 6984 }, { "epoch": 0.9345731870484346, "grad_norm": 46.39634323120117, "learning_rate": 6.542681295156542e-08, "loss": 0.3141, "step": 6985 }, { "epoch": 0.9347069842119347, "grad_norm": 27.290752410888672, "learning_rate": 6.529301578806528e-08, "loss": 0.3022, "step": 6986 }, { "epoch": 0.9348407813754348, "grad_norm": 54.68720626831055, "learning_rate": 6.515921862456515e-08, "loss": 0.4193, "step": 6987 }, { "epoch": 0.934974578538935, "grad_norm": 69.39680480957031, "learning_rate": 6.502542146106501e-08, "loss": 0.4858, "step": 6988 }, { "epoch": 0.9351083757024351, "grad_norm": 37.95942687988281, "learning_rate": 6.48916242975649e-08, "loss": 0.3907, "step": 6989 }, { "epoch": 0.9352421728659353, "grad_norm": 32.18305587768555, "learning_rate": 6.475782713406476e-08, "loss": 0.3457, "step": 6990 }, { "epoch": 0.9353759700294354, "grad_norm": 65.33206176757812, "learning_rate": 6.462402997056463e-08, "loss": 0.4049, "step": 6991 }, { "epoch": 0.9355097671929355, "grad_norm": 69.9959945678711, "learning_rate": 6.449023280706449e-08, "loss": 0.6725, "step": 6992 }, { "epoch": 0.9356435643564357, "grad_norm": 30.832040786743164, "learning_rate": 6.435643564356436e-08, "loss": 0.3455, "step": 6993 }, { "epoch": 0.9357773615199357, "grad_norm": 56.74186325073242, "learning_rate": 6.422263848006422e-08, "loss": 0.384, "step": 6994 }, { "epoch": 0.9359111586834359, "grad_norm": 45.395015716552734, "learning_rate": 6.408884131656409e-08, "loss": 0.3531, "step": 6995 }, { "epoch": 0.936044955846936, "grad_norm": 63.80315017700195, "learning_rate": 6.395504415306396e-08, "loss": 0.6046, "step": 6996 }, { "epoch": 0.9361787530104362, "grad_norm": 64.03328704833984, "learning_rate": 6.382124698956381e-08, "loss": 0.4602, "step": 6997 }, { "epoch": 0.9363125501739363, "grad_norm": 50.93942642211914, "learning_rate": 6.368744982606367e-08, "loss": 0.4027, "step": 6998 }, { "epoch": 0.9364463473374365, "grad_norm": 64.62743377685547, "learning_rate": 6.355365266256354e-08, "loss": 0.3694, "step": 6999 }, { "epoch": 0.9365801445009366, "grad_norm": 37.63440704345703, "learning_rate": 6.341985549906342e-08, "loss": 0.1748, "step": 7000 }, { "epoch": 0.9367139416644367, "grad_norm": 40.15706253051758, "learning_rate": 6.328605833556329e-08, "loss": 0.2347, "step": 7001 }, { "epoch": 0.9368477388279368, "grad_norm": 47.7231559753418, "learning_rate": 6.315226117206315e-08, "loss": 0.3564, "step": 7002 }, { "epoch": 0.936981535991437, "grad_norm": 39.555206298828125, "learning_rate": 6.301846400856302e-08, "loss": 0.2791, "step": 7003 }, { "epoch": 0.9371153331549371, "grad_norm": 40.303550720214844, "learning_rate": 6.288466684506288e-08, "loss": 0.4392, "step": 7004 }, { "epoch": 0.9372491303184373, "grad_norm": 29.65883445739746, "learning_rate": 6.275086968156275e-08, "loss": 0.1464, "step": 7005 }, { "epoch": 0.9373829274819374, "grad_norm": 37.18925476074219, "learning_rate": 6.261707251806262e-08, "loss": 0.2833, "step": 7006 }, { "epoch": 0.9375167246454376, "grad_norm": 34.877418518066406, "learning_rate": 6.248327535456248e-08, "loss": 0.2008, "step": 7007 }, { "epoch": 0.9376505218089376, "grad_norm": 33.52127456665039, "learning_rate": 6.234947819106235e-08, "loss": 0.1773, "step": 7008 }, { "epoch": 0.9377843189724377, "grad_norm": 68.64039611816406, "learning_rate": 6.221568102756221e-08, "loss": 0.4576, "step": 7009 }, { "epoch": 0.9379181161359379, "grad_norm": 43.140411376953125, "learning_rate": 6.208188386406208e-08, "loss": 0.3759, "step": 7010 }, { "epoch": 0.938051913299438, "grad_norm": 39.172447204589844, "learning_rate": 6.194808670056195e-08, "loss": 0.2503, "step": 7011 }, { "epoch": 0.9381857104629382, "grad_norm": 44.03388214111328, "learning_rate": 6.181428953706181e-08, "loss": 0.2436, "step": 7012 }, { "epoch": 0.9383195076264383, "grad_norm": 52.14653015136719, "learning_rate": 6.168049237356168e-08, "loss": 0.5061, "step": 7013 }, { "epoch": 0.9384533047899385, "grad_norm": 57.73456573486328, "learning_rate": 6.154669521006154e-08, "loss": 0.452, "step": 7014 }, { "epoch": 0.9385871019534386, "grad_norm": 71.77189636230469, "learning_rate": 6.141289804656141e-08, "loss": 0.543, "step": 7015 }, { "epoch": 0.9387208991169387, "grad_norm": 36.92636489868164, "learning_rate": 6.127910088306128e-08, "loss": 0.1849, "step": 7016 }, { "epoch": 0.9388546962804388, "grad_norm": 31.64043617248535, "learning_rate": 6.114530371956114e-08, "loss": 0.1852, "step": 7017 }, { "epoch": 0.938988493443939, "grad_norm": 65.56934356689453, "learning_rate": 6.101150655606101e-08, "loss": 0.5069, "step": 7018 }, { "epoch": 0.9391222906074391, "grad_norm": 57.668819427490234, "learning_rate": 6.087770939256087e-08, "loss": 0.4155, "step": 7019 }, { "epoch": 0.9392560877709393, "grad_norm": 46.265174865722656, "learning_rate": 6.074391222906074e-08, "loss": 0.312, "step": 7020 }, { "epoch": 0.9393898849344394, "grad_norm": 23.93571662902832, "learning_rate": 6.06101150655606e-08, "loss": 0.208, "step": 7021 }, { "epoch": 0.9395236820979396, "grad_norm": 65.5263671875, "learning_rate": 6.047631790206047e-08, "loss": 0.5834, "step": 7022 }, { "epoch": 0.9396574792614396, "grad_norm": 29.121822357177734, "learning_rate": 6.034252073856034e-08, "loss": 0.2273, "step": 7023 }, { "epoch": 0.9397912764249398, "grad_norm": 48.82900619506836, "learning_rate": 6.02087235750602e-08, "loss": 0.4002, "step": 7024 }, { "epoch": 0.9399250735884399, "grad_norm": 53.99676513671875, "learning_rate": 6.007492641156007e-08, "loss": 0.6329, "step": 7025 }, { "epoch": 0.9400588707519401, "grad_norm": 46.832088470458984, "learning_rate": 5.994112924805994e-08, "loss": 0.3385, "step": 7026 }, { "epoch": 0.9401926679154402, "grad_norm": 64.61243438720703, "learning_rate": 5.98073320845598e-08, "loss": 0.5631, "step": 7027 }, { "epoch": 0.9403264650789404, "grad_norm": 59.63475036621094, "learning_rate": 5.967353492105967e-08, "loss": 0.2035, "step": 7028 }, { "epoch": 0.9404602622424405, "grad_norm": 46.11490249633789, "learning_rate": 5.9539737757559535e-08, "loss": 0.4333, "step": 7029 }, { "epoch": 0.9405940594059405, "grad_norm": 51.02365493774414, "learning_rate": 5.94059405940594e-08, "loss": 0.2263, "step": 7030 }, { "epoch": 0.9407278565694407, "grad_norm": 40.082359313964844, "learning_rate": 5.9272143430559274e-08, "loss": 0.4786, "step": 7031 }, { "epoch": 0.9408616537329408, "grad_norm": 47.704376220703125, "learning_rate": 5.913834626705914e-08, "loss": 0.3168, "step": 7032 }, { "epoch": 0.940995450896441, "grad_norm": 56.89977264404297, "learning_rate": 5.9004549103559006e-08, "loss": 0.4473, "step": 7033 }, { "epoch": 0.9411292480599411, "grad_norm": 55.5408821105957, "learning_rate": 5.8870751940058865e-08, "loss": 0.3351, "step": 7034 }, { "epoch": 0.9412630452234413, "grad_norm": 49.04518127441406, "learning_rate": 5.873695477655873e-08, "loss": 0.3928, "step": 7035 }, { "epoch": 0.9413968423869414, "grad_norm": 53.850460052490234, "learning_rate": 5.86031576130586e-08, "loss": 0.3603, "step": 7036 }, { "epoch": 0.9415306395504416, "grad_norm": 52.20735168457031, "learning_rate": 5.846936044955847e-08, "loss": 0.3037, "step": 7037 }, { "epoch": 0.9416644367139416, "grad_norm": 42.70010757446289, "learning_rate": 5.8335563286058336e-08, "loss": 0.2289, "step": 7038 }, { "epoch": 0.9417982338774418, "grad_norm": 73.97172546386719, "learning_rate": 5.82017661225582e-08, "loss": 0.6075, "step": 7039 }, { "epoch": 0.9419320310409419, "grad_norm": 58.174373626708984, "learning_rate": 5.806796895905806e-08, "loss": 0.2953, "step": 7040 }, { "epoch": 0.9420658282044421, "grad_norm": 61.943782806396484, "learning_rate": 5.793417179555793e-08, "loss": 0.6227, "step": 7041 }, { "epoch": 0.9421996253679422, "grad_norm": 31.96554946899414, "learning_rate": 5.78003746320578e-08, "loss": 0.1479, "step": 7042 }, { "epoch": 0.9423334225314424, "grad_norm": 46.91334915161133, "learning_rate": 5.7666577468557666e-08, "loss": 0.3999, "step": 7043 }, { "epoch": 0.9424672196949425, "grad_norm": 40.767547607421875, "learning_rate": 5.753278030505753e-08, "loss": 0.2262, "step": 7044 }, { "epoch": 0.9426010168584426, "grad_norm": 68.52933502197266, "learning_rate": 5.73989831415574e-08, "loss": 0.5902, "step": 7045 }, { "epoch": 0.9427348140219427, "grad_norm": 31.4488468170166, "learning_rate": 5.726518597805726e-08, "loss": 0.1601, "step": 7046 }, { "epoch": 0.9428686111854429, "grad_norm": 77.36969757080078, "learning_rate": 5.7131388814557123e-08, "loss": 0.7693, "step": 7047 }, { "epoch": 0.943002408348943, "grad_norm": 48.04059600830078, "learning_rate": 5.6997591651056996e-08, "loss": 0.292, "step": 7048 }, { "epoch": 0.9431362055124431, "grad_norm": 69.22533416748047, "learning_rate": 5.686379448755686e-08, "loss": 0.4497, "step": 7049 }, { "epoch": 0.9432700026759433, "grad_norm": 70.33020782470703, "learning_rate": 5.672999732405673e-08, "loss": 0.453, "step": 7050 }, { "epoch": 0.9434037998394434, "grad_norm": 69.4498062133789, "learning_rate": 5.6596200160556594e-08, "loss": 0.4332, "step": 7051 }, { "epoch": 0.9435375970029435, "grad_norm": 81.94807434082031, "learning_rate": 5.646240299705646e-08, "loss": 0.5966, "step": 7052 }, { "epoch": 0.9436713941664436, "grad_norm": 58.82154846191406, "learning_rate": 5.6328605833556326e-08, "loss": 0.3857, "step": 7053 }, { "epoch": 0.9438051913299438, "grad_norm": 49.372650146484375, "learning_rate": 5.619480867005619e-08, "loss": 0.4299, "step": 7054 }, { "epoch": 0.9439389884934439, "grad_norm": 45.765628814697266, "learning_rate": 5.606101150655606e-08, "loss": 0.3015, "step": 7055 }, { "epoch": 0.9440727856569441, "grad_norm": 54.29952621459961, "learning_rate": 5.5927214343055924e-08, "loss": 0.6129, "step": 7056 }, { "epoch": 0.9442065828204442, "grad_norm": 45.82233810424805, "learning_rate": 5.579341717955579e-08, "loss": 0.2813, "step": 7057 }, { "epoch": 0.9443403799839444, "grad_norm": 42.56020736694336, "learning_rate": 5.5659620016055657e-08, "loss": 0.4025, "step": 7058 }, { "epoch": 0.9444741771474445, "grad_norm": 30.638160705566406, "learning_rate": 5.552582285255553e-08, "loss": 0.2074, "step": 7059 }, { "epoch": 0.9446079743109446, "grad_norm": 57.169925689697266, "learning_rate": 5.539202568905539e-08, "loss": 0.5226, "step": 7060 }, { "epoch": 0.9447417714744447, "grad_norm": 26.887121200561523, "learning_rate": 5.5258228525555255e-08, "loss": 0.2011, "step": 7061 }, { "epoch": 0.9448755686379449, "grad_norm": 45.37367248535156, "learning_rate": 5.512443136205512e-08, "loss": 0.2252, "step": 7062 }, { "epoch": 0.945009365801445, "grad_norm": 42.44330596923828, "learning_rate": 5.4990634198554987e-08, "loss": 0.2646, "step": 7063 }, { "epoch": 0.9451431629649452, "grad_norm": 46.418697357177734, "learning_rate": 5.485683703505485e-08, "loss": 0.3441, "step": 7064 }, { "epoch": 0.9452769601284453, "grad_norm": 28.199954986572266, "learning_rate": 5.4723039871554725e-08, "loss": 0.2182, "step": 7065 }, { "epoch": 0.9454107572919455, "grad_norm": 18.635215759277344, "learning_rate": 5.4589242708054585e-08, "loss": 0.1053, "step": 7066 }, { "epoch": 0.9455445544554455, "grad_norm": 24.35895538330078, "learning_rate": 5.445544554455445e-08, "loss": 0.173, "step": 7067 }, { "epoch": 0.9456783516189456, "grad_norm": 38.43659210205078, "learning_rate": 5.432164838105432e-08, "loss": 0.3209, "step": 7068 }, { "epoch": 0.9458121487824458, "grad_norm": 47.5968017578125, "learning_rate": 5.418785121755418e-08, "loss": 0.2786, "step": 7069 }, { "epoch": 0.9459459459459459, "grad_norm": 51.222286224365234, "learning_rate": 5.4054054054054056e-08, "loss": 0.3811, "step": 7070 }, { "epoch": 0.9460797431094461, "grad_norm": 74.24911499023438, "learning_rate": 5.392025689055392e-08, "loss": 0.4172, "step": 7071 }, { "epoch": 0.9462135402729462, "grad_norm": 34.45348358154297, "learning_rate": 5.378645972705379e-08, "loss": 0.2822, "step": 7072 }, { "epoch": 0.9463473374364464, "grad_norm": 50.986820220947266, "learning_rate": 5.365266256355365e-08, "loss": 0.5711, "step": 7073 }, { "epoch": 0.9464811345999464, "grad_norm": 25.92000389099121, "learning_rate": 5.351886540005351e-08, "loss": 0.1676, "step": 7074 }, { "epoch": 0.9466149317634466, "grad_norm": 59.04735565185547, "learning_rate": 5.338506823655338e-08, "loss": 0.416, "step": 7075 }, { "epoch": 0.9467487289269467, "grad_norm": 60.029335021972656, "learning_rate": 5.325127107305325e-08, "loss": 0.4668, "step": 7076 }, { "epoch": 0.9468825260904469, "grad_norm": 43.996612548828125, "learning_rate": 5.311747390955312e-08, "loss": 0.3397, "step": 7077 }, { "epoch": 0.947016323253947, "grad_norm": 43.25920867919922, "learning_rate": 5.2983676746052984e-08, "loss": 0.4322, "step": 7078 }, { "epoch": 0.9471501204174472, "grad_norm": 40.49636459350586, "learning_rate": 5.284987958255284e-08, "loss": 0.4719, "step": 7079 }, { "epoch": 0.9472839175809473, "grad_norm": 66.8399658203125, "learning_rate": 5.271608241905271e-08, "loss": 0.5435, "step": 7080 }, { "epoch": 0.9474177147444475, "grad_norm": 65.97261810302734, "learning_rate": 5.258228525555258e-08, "loss": 0.4567, "step": 7081 }, { "epoch": 0.9475515119079475, "grad_norm": 61.8963623046875, "learning_rate": 5.244848809205245e-08, "loss": 0.5025, "step": 7082 }, { "epoch": 0.9476853090714477, "grad_norm": 27.25416374206543, "learning_rate": 5.2314690928552314e-08, "loss": 0.1818, "step": 7083 }, { "epoch": 0.9478191062349478, "grad_norm": 57.920162200927734, "learning_rate": 5.218089376505218e-08, "loss": 0.2166, "step": 7084 }, { "epoch": 0.947952903398448, "grad_norm": 58.19135665893555, "learning_rate": 5.2047096601552046e-08, "loss": 0.281, "step": 7085 }, { "epoch": 0.9480867005619481, "grad_norm": 47.36156463623047, "learning_rate": 5.1913299438051905e-08, "loss": 0.4417, "step": 7086 }, { "epoch": 0.9482204977254483, "grad_norm": 63.899715423583984, "learning_rate": 5.177950227455178e-08, "loss": 0.3401, "step": 7087 }, { "epoch": 0.9483542948889484, "grad_norm": 31.957134246826172, "learning_rate": 5.1645705111051644e-08, "loss": 0.2906, "step": 7088 }, { "epoch": 0.9484880920524484, "grad_norm": 59.88203048706055, "learning_rate": 5.151190794755151e-08, "loss": 0.4863, "step": 7089 }, { "epoch": 0.9486218892159486, "grad_norm": 38.15177536010742, "learning_rate": 5.1378110784051376e-08, "loss": 0.4152, "step": 7090 }, { "epoch": 0.9487556863794487, "grad_norm": 54.43124008178711, "learning_rate": 5.124431362055124e-08, "loss": 0.3594, "step": 7091 }, { "epoch": 0.9488894835429489, "grad_norm": 47.51176071166992, "learning_rate": 5.111051645705111e-08, "loss": 0.445, "step": 7092 }, { "epoch": 0.949023280706449, "grad_norm": 42.889122009277344, "learning_rate": 5.0976719293550974e-08, "loss": 0.4661, "step": 7093 }, { "epoch": 0.9491570778699492, "grad_norm": 46.45252227783203, "learning_rate": 5.084292213005084e-08, "loss": 0.4142, "step": 7094 }, { "epoch": 0.9492908750334493, "grad_norm": 64.42156219482422, "learning_rate": 5.0709124966550706e-08, "loss": 0.3946, "step": 7095 }, { "epoch": 0.9494246721969494, "grad_norm": 47.080177307128906, "learning_rate": 5.057532780305057e-08, "loss": 0.3338, "step": 7096 }, { "epoch": 0.9495584693604495, "grad_norm": 25.96709442138672, "learning_rate": 5.044153063955044e-08, "loss": 0.1086, "step": 7097 }, { "epoch": 0.9496922665239497, "grad_norm": 56.25146484375, "learning_rate": 5.030773347605031e-08, "loss": 0.3899, "step": 7098 }, { "epoch": 0.9498260636874498, "grad_norm": 26.16075325012207, "learning_rate": 5.017393631255017e-08, "loss": 0.1964, "step": 7099 }, { "epoch": 0.94995986085095, "grad_norm": 43.26552200317383, "learning_rate": 5.0040139149050037e-08, "loss": 0.3564, "step": 7100 }, { "epoch": 0.9500936580144501, "grad_norm": 39.83720397949219, "learning_rate": 4.99063419855499e-08, "loss": 0.3439, "step": 7101 }, { "epoch": 0.9502274551779503, "grad_norm": 45.252647399902344, "learning_rate": 4.977254482204977e-08, "loss": 0.1739, "step": 7102 }, { "epoch": 0.9503612523414504, "grad_norm": 51.79472732543945, "learning_rate": 4.9638747658549635e-08, "loss": 0.4976, "step": 7103 }, { "epoch": 0.9504950495049505, "grad_norm": 27.523618698120117, "learning_rate": 4.950495049504951e-08, "loss": 0.209, "step": 7104 }, { "epoch": 0.9506288466684506, "grad_norm": 59.8983039855957, "learning_rate": 4.937115333154937e-08, "loss": 0.1718, "step": 7105 }, { "epoch": 0.9507626438319507, "grad_norm": 30.463699340820312, "learning_rate": 4.923735616804923e-08, "loss": 0.283, "step": 7106 }, { "epoch": 0.9508964409954509, "grad_norm": 24.894880294799805, "learning_rate": 4.91035590045491e-08, "loss": 0.212, "step": 7107 }, { "epoch": 0.951030238158951, "grad_norm": 54.341224670410156, "learning_rate": 4.8969761841048965e-08, "loss": 0.3484, "step": 7108 }, { "epoch": 0.9511640353224512, "grad_norm": 48.23430252075195, "learning_rate": 4.883596467754884e-08, "loss": 0.3506, "step": 7109 }, { "epoch": 0.9512978324859513, "grad_norm": 55.9367790222168, "learning_rate": 4.8702167514048704e-08, "loss": 0.4051, "step": 7110 }, { "epoch": 0.9514316296494514, "grad_norm": 45.33068084716797, "learning_rate": 4.856837035054857e-08, "loss": 0.3641, "step": 7111 }, { "epoch": 0.9515654268129515, "grad_norm": 34.328155517578125, "learning_rate": 4.843457318704843e-08, "loss": 0.271, "step": 7112 }, { "epoch": 0.9516992239764517, "grad_norm": 39.73078536987305, "learning_rate": 4.8300776023548295e-08, "loss": 0.4673, "step": 7113 }, { "epoch": 0.9518330211399518, "grad_norm": 39.01171875, "learning_rate": 4.816697886004816e-08, "loss": 0.333, "step": 7114 }, { "epoch": 0.951966818303452, "grad_norm": 44.293617248535156, "learning_rate": 4.8033181696548034e-08, "loss": 0.5853, "step": 7115 }, { "epoch": 0.9521006154669521, "grad_norm": 41.176780700683594, "learning_rate": 4.78993845330479e-08, "loss": 0.4569, "step": 7116 }, { "epoch": 0.9522344126304523, "grad_norm": 49.43171691894531, "learning_rate": 4.7765587369547766e-08, "loss": 0.363, "step": 7117 }, { "epoch": 0.9523682097939523, "grad_norm": 27.538867950439453, "learning_rate": 4.7631790206047625e-08, "loss": 0.1547, "step": 7118 }, { "epoch": 0.9525020069574525, "grad_norm": 37.86224365234375, "learning_rate": 4.749799304254749e-08, "loss": 0.2502, "step": 7119 }, { "epoch": 0.9526358041209526, "grad_norm": 48.41373825073242, "learning_rate": 4.7364195879047364e-08, "loss": 0.3225, "step": 7120 }, { "epoch": 0.9527696012844528, "grad_norm": 60.32798767089844, "learning_rate": 4.723039871554723e-08, "loss": 0.4336, "step": 7121 }, { "epoch": 0.9529033984479529, "grad_norm": 26.59246063232422, "learning_rate": 4.7096601552047096e-08, "loss": 0.1067, "step": 7122 }, { "epoch": 0.9530371956114531, "grad_norm": 48.103919982910156, "learning_rate": 4.696280438854696e-08, "loss": 0.3217, "step": 7123 }, { "epoch": 0.9531709927749532, "grad_norm": 51.966365814208984, "learning_rate": 4.682900722504683e-08, "loss": 0.3733, "step": 7124 }, { "epoch": 0.9533047899384534, "grad_norm": 39.791629791259766, "learning_rate": 4.669521006154669e-08, "loss": 0.3528, "step": 7125 }, { "epoch": 0.9534385871019534, "grad_norm": 35.10197067260742, "learning_rate": 4.656141289804656e-08, "loss": 0.3054, "step": 7126 }, { "epoch": 0.9535723842654535, "grad_norm": 57.77900695800781, "learning_rate": 4.6427615734546426e-08, "loss": 0.3155, "step": 7127 }, { "epoch": 0.9537061814289537, "grad_norm": 31.543628692626953, "learning_rate": 4.629381857104629e-08, "loss": 0.2685, "step": 7128 }, { "epoch": 0.9538399785924538, "grad_norm": 55.76829528808594, "learning_rate": 4.616002140754616e-08, "loss": 0.6023, "step": 7129 }, { "epoch": 0.953973775755954, "grad_norm": 53.51381301879883, "learning_rate": 4.6026224244046024e-08, "loss": 0.3766, "step": 7130 }, { "epoch": 0.9541075729194541, "grad_norm": 56.69764709472656, "learning_rate": 4.58924270805459e-08, "loss": 0.3341, "step": 7131 }, { "epoch": 0.9542413700829543, "grad_norm": 31.63210105895996, "learning_rate": 4.5758629917045756e-08, "loss": 0.2813, "step": 7132 }, { "epoch": 0.9543751672464543, "grad_norm": 30.249645233154297, "learning_rate": 4.562483275354562e-08, "loss": 0.2016, "step": 7133 }, { "epoch": 0.9545089644099545, "grad_norm": 31.07929039001465, "learning_rate": 4.549103559004549e-08, "loss": 0.3282, "step": 7134 }, { "epoch": 0.9546427615734546, "grad_norm": 61.86595153808594, "learning_rate": 4.5357238426545354e-08, "loss": 0.2974, "step": 7135 }, { "epoch": 0.9547765587369548, "grad_norm": 39.839664459228516, "learning_rate": 4.522344126304522e-08, "loss": 0.3747, "step": 7136 }, { "epoch": 0.9549103559004549, "grad_norm": 21.667682647705078, "learning_rate": 4.508964409954509e-08, "loss": 0.1991, "step": 7137 }, { "epoch": 0.9550441530639551, "grad_norm": 46.474639892578125, "learning_rate": 4.495584693604495e-08, "loss": 0.4135, "step": 7138 }, { "epoch": 0.9551779502274552, "grad_norm": 43.3764533996582, "learning_rate": 4.482204977254482e-08, "loss": 0.2312, "step": 7139 }, { "epoch": 0.9553117473909553, "grad_norm": 37.49618911743164, "learning_rate": 4.4688252609044685e-08, "loss": 0.5041, "step": 7140 }, { "epoch": 0.9554455445544554, "grad_norm": 33.723880767822266, "learning_rate": 4.455445544554455e-08, "loss": 0.2239, "step": 7141 }, { "epoch": 0.9555793417179556, "grad_norm": 63.30399703979492, "learning_rate": 4.442065828204442e-08, "loss": 0.3655, "step": 7142 }, { "epoch": 0.9557131388814557, "grad_norm": 80.92396545410156, "learning_rate": 4.428686111854429e-08, "loss": 0.507, "step": 7143 }, { "epoch": 0.9558469360449559, "grad_norm": 65.03535461425781, "learning_rate": 4.4153063955044155e-08, "loss": 0.3421, "step": 7144 }, { "epoch": 0.955980733208456, "grad_norm": 66.23128509521484, "learning_rate": 4.4019266791544015e-08, "loss": 0.538, "step": 7145 }, { "epoch": 0.9561145303719562, "grad_norm": 41.85950469970703, "learning_rate": 4.388546962804388e-08, "loss": 0.3211, "step": 7146 }, { "epoch": 0.9562483275354563, "grad_norm": 54.516502380371094, "learning_rate": 4.375167246454375e-08, "loss": 0.3711, "step": 7147 }, { "epoch": 0.9563821246989563, "grad_norm": 27.624143600463867, "learning_rate": 4.361787530104362e-08, "loss": 0.196, "step": 7148 }, { "epoch": 0.9565159218624565, "grad_norm": 28.334136962890625, "learning_rate": 4.3484078137543486e-08, "loss": 0.2266, "step": 7149 }, { "epoch": 0.9566497190259566, "grad_norm": 59.137271881103516, "learning_rate": 4.335028097404335e-08, "loss": 0.4287, "step": 7150 }, { "epoch": 0.9567835161894568, "grad_norm": 56.554351806640625, "learning_rate": 4.321648381054321e-08, "loss": 0.4976, "step": 7151 }, { "epoch": 0.9569173133529569, "grad_norm": 45.42951965332031, "learning_rate": 4.308268664704308e-08, "loss": 0.4718, "step": 7152 }, { "epoch": 0.9570511105164571, "grad_norm": 47.2115592956543, "learning_rate": 4.294888948354294e-08, "loss": 0.4024, "step": 7153 }, { "epoch": 0.9571849076799572, "grad_norm": 40.08524703979492, "learning_rate": 4.2815092320042816e-08, "loss": 0.3078, "step": 7154 }, { "epoch": 0.9573187048434573, "grad_norm": 26.09086036682129, "learning_rate": 4.268129515654268e-08, "loss": 0.1776, "step": 7155 }, { "epoch": 0.9574525020069574, "grad_norm": 47.63959503173828, "learning_rate": 4.254749799304255e-08, "loss": 0.5593, "step": 7156 }, { "epoch": 0.9575862991704576, "grad_norm": 21.555973052978516, "learning_rate": 4.2413700829542414e-08, "loss": 0.1663, "step": 7157 }, { "epoch": 0.9577200963339577, "grad_norm": 34.26082229614258, "learning_rate": 4.227990366604227e-08, "loss": 0.2718, "step": 7158 }, { "epoch": 0.9578538934974579, "grad_norm": 45.108158111572266, "learning_rate": 4.2146106502542146e-08, "loss": 0.2948, "step": 7159 }, { "epoch": 0.957987690660958, "grad_norm": 52.688072204589844, "learning_rate": 4.201230933904201e-08, "loss": 0.3884, "step": 7160 }, { "epoch": 0.9581214878244582, "grad_norm": 48.619503021240234, "learning_rate": 4.187851217554188e-08, "loss": 0.4888, "step": 7161 }, { "epoch": 0.9582552849879582, "grad_norm": 41.081573486328125, "learning_rate": 4.1744715012041744e-08, "loss": 0.3669, "step": 7162 }, { "epoch": 0.9583890821514583, "grad_norm": 42.436126708984375, "learning_rate": 4.161091784854161e-08, "loss": 0.4357, "step": 7163 }, { "epoch": 0.9585228793149585, "grad_norm": 53.71453094482422, "learning_rate": 4.147712068504147e-08, "loss": 0.253, "step": 7164 }, { "epoch": 0.9586566764784586, "grad_norm": 41.95381546020508, "learning_rate": 4.134332352154134e-08, "loss": 0.2883, "step": 7165 }, { "epoch": 0.9587904736419588, "grad_norm": 62.77477264404297, "learning_rate": 4.120952635804121e-08, "loss": 0.4427, "step": 7166 }, { "epoch": 0.958924270805459, "grad_norm": 58.41498947143555, "learning_rate": 4.1075729194541074e-08, "loss": 0.3071, "step": 7167 }, { "epoch": 0.9590580679689591, "grad_norm": 56.66213607788086, "learning_rate": 4.094193203104094e-08, "loss": 0.2681, "step": 7168 }, { "epoch": 0.9591918651324592, "grad_norm": 37.08858871459961, "learning_rate": 4.0808134867540806e-08, "loss": 0.4956, "step": 7169 }, { "epoch": 0.9593256622959593, "grad_norm": 50.60580062866211, "learning_rate": 4.067433770404067e-08, "loss": 0.3114, "step": 7170 }, { "epoch": 0.9594594594594594, "grad_norm": 51.02156448364258, "learning_rate": 4.054054054054054e-08, "loss": 0.2915, "step": 7171 }, { "epoch": 0.9595932566229596, "grad_norm": 43.81181335449219, "learning_rate": 4.0406743377040404e-08, "loss": 0.2926, "step": 7172 }, { "epoch": 0.9597270537864597, "grad_norm": 48.27928924560547, "learning_rate": 4.027294621354027e-08, "loss": 0.3782, "step": 7173 }, { "epoch": 0.9598608509499599, "grad_norm": 44.83047103881836, "learning_rate": 4.0139149050040136e-08, "loss": 0.3276, "step": 7174 }, { "epoch": 0.95999464811346, "grad_norm": 34.002220153808594, "learning_rate": 4.000535188654e-08, "loss": 0.3681, "step": 7175 }, { "epoch": 0.9601284452769602, "grad_norm": 51.46653366088867, "learning_rate": 3.9871554723039875e-08, "loss": 0.3563, "step": 7176 }, { "epoch": 0.9602622424404602, "grad_norm": 31.037372589111328, "learning_rate": 3.9737757559539735e-08, "loss": 0.2971, "step": 7177 }, { "epoch": 0.9603960396039604, "grad_norm": 43.24900817871094, "learning_rate": 3.96039603960396e-08, "loss": 0.4345, "step": 7178 }, { "epoch": 0.9605298367674605, "grad_norm": 44.00263595581055, "learning_rate": 3.9470163232539467e-08, "loss": 0.2496, "step": 7179 }, { "epoch": 0.9606636339309607, "grad_norm": 69.944091796875, "learning_rate": 3.933636606903933e-08, "loss": 0.5389, "step": 7180 }, { "epoch": 0.9607974310944608, "grad_norm": 60.09908676147461, "learning_rate": 3.92025689055392e-08, "loss": 0.4477, "step": 7181 }, { "epoch": 0.960931228257961, "grad_norm": 47.39865493774414, "learning_rate": 3.906877174203907e-08, "loss": 0.247, "step": 7182 }, { "epoch": 0.9610650254214611, "grad_norm": 40.92993927001953, "learning_rate": 3.893497457853894e-08, "loss": 0.2835, "step": 7183 }, { "epoch": 0.9611988225849611, "grad_norm": 58.65975570678711, "learning_rate": 3.88011774150388e-08, "loss": 0.2902, "step": 7184 }, { "epoch": 0.9613326197484613, "grad_norm": 45.9123649597168, "learning_rate": 3.866738025153866e-08, "loss": 0.3674, "step": 7185 }, { "epoch": 0.9614664169119614, "grad_norm": 35.929203033447266, "learning_rate": 3.853358308803853e-08, "loss": 0.2615, "step": 7186 }, { "epoch": 0.9616002140754616, "grad_norm": 55.535552978515625, "learning_rate": 3.83997859245384e-08, "loss": 0.4834, "step": 7187 }, { "epoch": 0.9617340112389617, "grad_norm": 70.51167297363281, "learning_rate": 3.826598876103827e-08, "loss": 0.4652, "step": 7188 }, { "epoch": 0.9618678084024619, "grad_norm": 48.52618408203125, "learning_rate": 3.8132191597538134e-08, "loss": 0.4962, "step": 7189 }, { "epoch": 0.962001605565962, "grad_norm": 26.18316650390625, "learning_rate": 3.799839443403799e-08, "loss": 0.2514, "step": 7190 }, { "epoch": 0.9621354027294622, "grad_norm": 33.93279266357422, "learning_rate": 3.786459727053786e-08, "loss": 0.2315, "step": 7191 }, { "epoch": 0.9622691998929622, "grad_norm": 64.45465087890625, "learning_rate": 3.7730800107037725e-08, "loss": 0.488, "step": 7192 }, { "epoch": 0.9624029970564624, "grad_norm": 32.78951644897461, "learning_rate": 3.75970029435376e-08, "loss": 0.3455, "step": 7193 }, { "epoch": 0.9625367942199625, "grad_norm": 40.31325912475586, "learning_rate": 3.7463205780037464e-08, "loss": 0.3107, "step": 7194 }, { "epoch": 0.9626705913834627, "grad_norm": 67.2489013671875, "learning_rate": 3.732940861653733e-08, "loss": 0.4901, "step": 7195 }, { "epoch": 0.9628043885469628, "grad_norm": 42.16135025024414, "learning_rate": 3.7195611453037196e-08, "loss": 0.381, "step": 7196 }, { "epoch": 0.962938185710463, "grad_norm": 40.37718200683594, "learning_rate": 3.7061814289537055e-08, "loss": 0.2677, "step": 7197 }, { "epoch": 0.9630719828739631, "grad_norm": 72.6696548461914, "learning_rate": 3.692801712603693e-08, "loss": 0.5954, "step": 7198 }, { "epoch": 0.9632057800374632, "grad_norm": 49.14225387573242, "learning_rate": 3.6794219962536794e-08, "loss": 0.3503, "step": 7199 }, { "epoch": 0.9633395772009633, "grad_norm": 61.938602447509766, "learning_rate": 3.666042279903666e-08, "loss": 0.4172, "step": 7200 }, { "epoch": 0.9634733743644635, "grad_norm": 44.49369812011719, "learning_rate": 3.6526625635536526e-08, "loss": 0.2901, "step": 7201 }, { "epoch": 0.9636071715279636, "grad_norm": 44.571659088134766, "learning_rate": 3.639282847203639e-08, "loss": 0.3981, "step": 7202 }, { "epoch": 0.9637409686914638, "grad_norm": 53.549503326416016, "learning_rate": 3.625903130853625e-08, "loss": 0.5374, "step": 7203 }, { "epoch": 0.9638747658549639, "grad_norm": 74.08162689208984, "learning_rate": 3.6125234145036124e-08, "loss": 0.6493, "step": 7204 }, { "epoch": 0.964008563018464, "grad_norm": 37.59739685058594, "learning_rate": 3.599143698153599e-08, "loss": 0.1961, "step": 7205 }, { "epoch": 0.9641423601819642, "grad_norm": 44.14349365234375, "learning_rate": 3.5857639818035856e-08, "loss": 0.3687, "step": 7206 }, { "epoch": 0.9642761573454642, "grad_norm": 34.83787155151367, "learning_rate": 3.572384265453572e-08, "loss": 0.3663, "step": 7207 }, { "epoch": 0.9644099545089644, "grad_norm": 57.84967041015625, "learning_rate": 3.559004549103559e-08, "loss": 0.5316, "step": 7208 }, { "epoch": 0.9645437516724645, "grad_norm": 46.5767822265625, "learning_rate": 3.5456248327535454e-08, "loss": 0.3865, "step": 7209 }, { "epoch": 0.9646775488359647, "grad_norm": 37.96615219116211, "learning_rate": 3.532245116403532e-08, "loss": 0.3596, "step": 7210 }, { "epoch": 0.9648113459994648, "grad_norm": 35.791839599609375, "learning_rate": 3.5188654000535186e-08, "loss": 0.3163, "step": 7211 }, { "epoch": 0.964945143162965, "grad_norm": 37.39236068725586, "learning_rate": 3.505485683703505e-08, "loss": 0.3009, "step": 7212 }, { "epoch": 0.9650789403264651, "grad_norm": 44.124996185302734, "learning_rate": 3.492105967353492e-08, "loss": 0.4501, "step": 7213 }, { "epoch": 0.9652127374899652, "grad_norm": 27.223859786987305, "learning_rate": 3.4787262510034784e-08, "loss": 0.2078, "step": 7214 }, { "epoch": 0.9653465346534653, "grad_norm": 25.52492904663086, "learning_rate": 3.465346534653466e-08, "loss": 0.1472, "step": 7215 }, { "epoch": 0.9654803318169655, "grad_norm": 18.22911262512207, "learning_rate": 3.451966818303452e-08, "loss": 0.1454, "step": 7216 }, { "epoch": 0.9656141289804656, "grad_norm": 33.517879486083984, "learning_rate": 3.438587101953438e-08, "loss": 0.2536, "step": 7217 }, { "epoch": 0.9657479261439658, "grad_norm": 32.96928405761719, "learning_rate": 3.425207385603425e-08, "loss": 0.2217, "step": 7218 }, { "epoch": 0.9658817233074659, "grad_norm": 53.212135314941406, "learning_rate": 3.4118276692534115e-08, "loss": 0.4595, "step": 7219 }, { "epoch": 0.9660155204709661, "grad_norm": 49.953773498535156, "learning_rate": 3.398447952903398e-08, "loss": 0.336, "step": 7220 }, { "epoch": 0.9661493176344661, "grad_norm": 35.77394485473633, "learning_rate": 3.3850682365533853e-08, "loss": 0.2468, "step": 7221 }, { "epoch": 0.9662831147979662, "grad_norm": 43.1209602355957, "learning_rate": 3.371688520203372e-08, "loss": 0.253, "step": 7222 }, { "epoch": 0.9664169119614664, "grad_norm": 52.48933410644531, "learning_rate": 3.358308803853358e-08, "loss": 0.3872, "step": 7223 }, { "epoch": 0.9665507091249665, "grad_norm": 63.2009162902832, "learning_rate": 3.3449290875033445e-08, "loss": 0.4706, "step": 7224 }, { "epoch": 0.9666845062884667, "grad_norm": 48.85515594482422, "learning_rate": 3.331549371153331e-08, "loss": 0.3999, "step": 7225 }, { "epoch": 0.9668183034519668, "grad_norm": 49.72726821899414, "learning_rate": 3.3181696548033184e-08, "loss": 0.5051, "step": 7226 }, { "epoch": 0.966952100615467, "grad_norm": 48.635597229003906, "learning_rate": 3.304789938453305e-08, "loss": 0.601, "step": 7227 }, { "epoch": 0.9670858977789671, "grad_norm": 36.095733642578125, "learning_rate": 3.2914102221032916e-08, "loss": 0.3682, "step": 7228 }, { "epoch": 0.9672196949424672, "grad_norm": 64.23587799072266, "learning_rate": 3.2780305057532775e-08, "loss": 0.4596, "step": 7229 }, { "epoch": 0.9673534921059673, "grad_norm": 41.01141357421875, "learning_rate": 3.264650789403264e-08, "loss": 0.2917, "step": 7230 }, { "epoch": 0.9674872892694675, "grad_norm": 46.29179000854492, "learning_rate": 3.251271073053251e-08, "loss": 0.3662, "step": 7231 }, { "epoch": 0.9676210864329676, "grad_norm": 61.27012252807617, "learning_rate": 3.237891356703238e-08, "loss": 0.5306, "step": 7232 }, { "epoch": 0.9677548835964678, "grad_norm": 67.93814086914062, "learning_rate": 3.2245116403532246e-08, "loss": 0.4971, "step": 7233 }, { "epoch": 0.9678886807599679, "grad_norm": 53.75419235229492, "learning_rate": 3.211131924003211e-08, "loss": 0.4866, "step": 7234 }, { "epoch": 0.9680224779234681, "grad_norm": 26.986148834228516, "learning_rate": 3.197752207653198e-08, "loss": 0.1671, "step": 7235 }, { "epoch": 0.9681562750869681, "grad_norm": 59.23550033569336, "learning_rate": 3.184372491303184e-08, "loss": 0.3617, "step": 7236 }, { "epoch": 0.9682900722504683, "grad_norm": 84.9261245727539, "learning_rate": 3.170992774953171e-08, "loss": 0.554, "step": 7237 }, { "epoch": 0.9684238694139684, "grad_norm": 25.04006004333496, "learning_rate": 3.1576130586031576e-08, "loss": 0.1749, "step": 7238 }, { "epoch": 0.9685576665774686, "grad_norm": 33.81444549560547, "learning_rate": 3.144233342253144e-08, "loss": 0.1627, "step": 7239 }, { "epoch": 0.9686914637409687, "grad_norm": 35.59192657470703, "learning_rate": 3.130853625903131e-08, "loss": 0.2359, "step": 7240 }, { "epoch": 0.9688252609044689, "grad_norm": 74.52083587646484, "learning_rate": 3.1174739095531174e-08, "loss": 0.3273, "step": 7241 }, { "epoch": 0.968959058067969, "grad_norm": 64.943603515625, "learning_rate": 3.104094193203104e-08, "loss": 0.3193, "step": 7242 }, { "epoch": 0.969092855231469, "grad_norm": 62.517398834228516, "learning_rate": 3.0907144768530906e-08, "loss": 0.4207, "step": 7243 }, { "epoch": 0.9692266523949692, "grad_norm": 58.86537551879883, "learning_rate": 3.077334760503077e-08, "loss": 0.5404, "step": 7244 }, { "epoch": 0.9693604495584693, "grad_norm": 32.31926345825195, "learning_rate": 3.063955044153064e-08, "loss": 0.2606, "step": 7245 }, { "epoch": 0.9694942467219695, "grad_norm": 40.17202377319336, "learning_rate": 3.0505753278030504e-08, "loss": 0.287, "step": 7246 }, { "epoch": 0.9696280438854696, "grad_norm": 53.25642776489258, "learning_rate": 3.037195611453037e-08, "loss": 0.4211, "step": 7247 }, { "epoch": 0.9697618410489698, "grad_norm": 48.03364562988281, "learning_rate": 3.0238158951030236e-08, "loss": 0.5136, "step": 7248 }, { "epoch": 0.9698956382124699, "grad_norm": 38.34318161010742, "learning_rate": 3.01043617875301e-08, "loss": 0.3612, "step": 7249 }, { "epoch": 0.9700294353759701, "grad_norm": 50.60173416137695, "learning_rate": 2.997056462402997e-08, "loss": 0.2749, "step": 7250 }, { "epoch": 0.9701632325394701, "grad_norm": 33.33936309814453, "learning_rate": 2.9836767460529834e-08, "loss": 0.2046, "step": 7251 }, { "epoch": 0.9702970297029703, "grad_norm": 45.062347412109375, "learning_rate": 2.97029702970297e-08, "loss": 0.577, "step": 7252 }, { "epoch": 0.9704308268664704, "grad_norm": 44.337032318115234, "learning_rate": 2.956917313352957e-08, "loss": 0.358, "step": 7253 }, { "epoch": 0.9705646240299706, "grad_norm": 33.11764144897461, "learning_rate": 2.9435375970029433e-08, "loss": 0.3174, "step": 7254 }, { "epoch": 0.9706984211934707, "grad_norm": 57.8846435546875, "learning_rate": 2.93015788065293e-08, "loss": 0.3384, "step": 7255 }, { "epoch": 0.9708322183569709, "grad_norm": 20.6994686126709, "learning_rate": 2.9167781643029168e-08, "loss": 0.2434, "step": 7256 }, { "epoch": 0.970966015520471, "grad_norm": 21.61481285095215, "learning_rate": 2.903398447952903e-08, "loss": 0.1261, "step": 7257 }, { "epoch": 0.971099812683971, "grad_norm": 56.51685333251953, "learning_rate": 2.89001873160289e-08, "loss": 0.3415, "step": 7258 }, { "epoch": 0.9712336098474712, "grad_norm": 35.979007720947266, "learning_rate": 2.8766390152528766e-08, "loss": 0.1859, "step": 7259 }, { "epoch": 0.9713674070109714, "grad_norm": 29.388277053833008, "learning_rate": 2.863259298902863e-08, "loss": 0.2196, "step": 7260 }, { "epoch": 0.9715012041744715, "grad_norm": 27.950496673583984, "learning_rate": 2.8498795825528498e-08, "loss": 0.2278, "step": 7261 }, { "epoch": 0.9716350013379716, "grad_norm": 47.09110641479492, "learning_rate": 2.8364998662028364e-08, "loss": 0.4357, "step": 7262 }, { "epoch": 0.9717687985014718, "grad_norm": 52.33483123779297, "learning_rate": 2.823120149852823e-08, "loss": 0.4274, "step": 7263 }, { "epoch": 0.971902595664972, "grad_norm": 42.13747787475586, "learning_rate": 2.8097404335028096e-08, "loss": 0.2204, "step": 7264 }, { "epoch": 0.972036392828472, "grad_norm": 40.78249740600586, "learning_rate": 2.7963607171527962e-08, "loss": 0.2338, "step": 7265 }, { "epoch": 0.9721701899919721, "grad_norm": 40.73964309692383, "learning_rate": 2.7829810008027828e-08, "loss": 0.3324, "step": 7266 }, { "epoch": 0.9723039871554723, "grad_norm": 47.384742736816406, "learning_rate": 2.7696012844527694e-08, "loss": 0.3542, "step": 7267 }, { "epoch": 0.9724377843189724, "grad_norm": 30.940719604492188, "learning_rate": 2.756221568102756e-08, "loss": 0.3653, "step": 7268 }, { "epoch": 0.9725715814824726, "grad_norm": 66.4526138305664, "learning_rate": 2.7428418517527426e-08, "loss": 0.5996, "step": 7269 }, { "epoch": 0.9727053786459727, "grad_norm": 71.33191680908203, "learning_rate": 2.7294621354027292e-08, "loss": 0.4866, "step": 7270 }, { "epoch": 0.9728391758094729, "grad_norm": 41.60784912109375, "learning_rate": 2.716082419052716e-08, "loss": 0.3836, "step": 7271 }, { "epoch": 0.972972972972973, "grad_norm": 44.52705764770508, "learning_rate": 2.7027027027027028e-08, "loss": 0.2474, "step": 7272 }, { "epoch": 0.9731067701364731, "grad_norm": 34.82537078857422, "learning_rate": 2.6893229863526894e-08, "loss": 0.2221, "step": 7273 }, { "epoch": 0.9732405672999732, "grad_norm": 35.09381103515625, "learning_rate": 2.6759432700026757e-08, "loss": 0.1496, "step": 7274 }, { "epoch": 0.9733743644634734, "grad_norm": 39.89849853515625, "learning_rate": 2.6625635536526626e-08, "loss": 0.3376, "step": 7275 }, { "epoch": 0.9735081616269735, "grad_norm": 82.34014129638672, "learning_rate": 2.6491838373026492e-08, "loss": 0.4172, "step": 7276 }, { "epoch": 0.9736419587904737, "grad_norm": 47.06584167480469, "learning_rate": 2.6358041209526355e-08, "loss": 0.3218, "step": 7277 }, { "epoch": 0.9737757559539738, "grad_norm": 50.59303665161133, "learning_rate": 2.6224244046026224e-08, "loss": 0.4992, "step": 7278 }, { "epoch": 0.973909553117474, "grad_norm": 31.13294792175293, "learning_rate": 2.609044688252609e-08, "loss": 0.1561, "step": 7279 }, { "epoch": 0.974043350280974, "grad_norm": 41.116573333740234, "learning_rate": 2.5956649719025953e-08, "loss": 0.3279, "step": 7280 }, { "epoch": 0.9741771474444741, "grad_norm": 45.80060577392578, "learning_rate": 2.5822852555525822e-08, "loss": 0.3398, "step": 7281 }, { "epoch": 0.9743109446079743, "grad_norm": 43.42937088012695, "learning_rate": 2.5689055392025688e-08, "loss": 0.3259, "step": 7282 }, { "epoch": 0.9744447417714744, "grad_norm": 62.22471237182617, "learning_rate": 2.5555258228525554e-08, "loss": 0.5307, "step": 7283 }, { "epoch": 0.9745785389349746, "grad_norm": 34.14992141723633, "learning_rate": 2.542146106502542e-08, "loss": 0.2851, "step": 7284 }, { "epoch": 0.9747123360984747, "grad_norm": 56.72453308105469, "learning_rate": 2.5287663901525286e-08, "loss": 0.3986, "step": 7285 }, { "epoch": 0.9748461332619749, "grad_norm": 47.577117919921875, "learning_rate": 2.5153866738025156e-08, "loss": 0.2236, "step": 7286 }, { "epoch": 0.9749799304254749, "grad_norm": 49.966705322265625, "learning_rate": 2.5020069574525018e-08, "loss": 0.246, "step": 7287 }, { "epoch": 0.9751137275889751, "grad_norm": 58.9353141784668, "learning_rate": 2.4886272411024884e-08, "loss": 0.4547, "step": 7288 }, { "epoch": 0.9752475247524752, "grad_norm": 70.85787963867188, "learning_rate": 2.4752475247524754e-08, "loss": 0.5032, "step": 7289 }, { "epoch": 0.9753813219159754, "grad_norm": 38.07657241821289, "learning_rate": 2.4618678084024616e-08, "loss": 0.3084, "step": 7290 }, { "epoch": 0.9755151190794755, "grad_norm": 26.453969955444336, "learning_rate": 2.4484880920524482e-08, "loss": 0.2615, "step": 7291 }, { "epoch": 0.9756489162429757, "grad_norm": 41.59080505371094, "learning_rate": 2.4351083757024352e-08, "loss": 0.3213, "step": 7292 }, { "epoch": 0.9757827134064758, "grad_norm": 44.33418273925781, "learning_rate": 2.4217286593524215e-08, "loss": 0.2322, "step": 7293 }, { "epoch": 0.975916510569976, "grad_norm": 55.08329772949219, "learning_rate": 2.408348943002408e-08, "loss": 0.5035, "step": 7294 }, { "epoch": 0.976050307733476, "grad_norm": 33.93308639526367, "learning_rate": 2.394969226652395e-08, "loss": 0.3111, "step": 7295 }, { "epoch": 0.9761841048969762, "grad_norm": 37.5789794921875, "learning_rate": 2.3815895103023813e-08, "loss": 0.222, "step": 7296 }, { "epoch": 0.9763179020604763, "grad_norm": 51.99820327758789, "learning_rate": 2.3682097939523682e-08, "loss": 0.3976, "step": 7297 }, { "epoch": 0.9764516992239765, "grad_norm": 36.866756439208984, "learning_rate": 2.3548300776023548e-08, "loss": 0.2454, "step": 7298 }, { "epoch": 0.9765854963874766, "grad_norm": 29.868574142456055, "learning_rate": 2.3414503612523414e-08, "loss": 0.2329, "step": 7299 }, { "epoch": 0.9767192935509768, "grad_norm": 47.31033706665039, "learning_rate": 2.328070644902328e-08, "loss": 0.4259, "step": 7300 }, { "epoch": 0.9768530907144769, "grad_norm": 45.005706787109375, "learning_rate": 2.3146909285523146e-08, "loss": 0.3036, "step": 7301 }, { "epoch": 0.9769868878779769, "grad_norm": 72.46842193603516, "learning_rate": 2.3013112122023012e-08, "loss": 0.5568, "step": 7302 }, { "epoch": 0.9771206850414771, "grad_norm": 42.19746780395508, "learning_rate": 2.2879314958522878e-08, "loss": 0.2845, "step": 7303 }, { "epoch": 0.9772544822049772, "grad_norm": 64.98949432373047, "learning_rate": 2.2745517795022744e-08, "loss": 0.3727, "step": 7304 }, { "epoch": 0.9773882793684774, "grad_norm": 76.5416259765625, "learning_rate": 2.261172063152261e-08, "loss": 0.3679, "step": 7305 }, { "epoch": 0.9775220765319775, "grad_norm": 38.77809143066406, "learning_rate": 2.2477923468022476e-08, "loss": 0.2318, "step": 7306 }, { "epoch": 0.9776558736954777, "grad_norm": 38.90217971801758, "learning_rate": 2.2344126304522342e-08, "loss": 0.2487, "step": 7307 }, { "epoch": 0.9777896708589778, "grad_norm": 35.78802490234375, "learning_rate": 2.221032914102221e-08, "loss": 0.2536, "step": 7308 }, { "epoch": 0.9779234680224779, "grad_norm": 37.79302978515625, "learning_rate": 2.2076531977522078e-08, "loss": 0.2412, "step": 7309 }, { "epoch": 0.978057265185978, "grad_norm": 51.76309585571289, "learning_rate": 2.194273481402194e-08, "loss": 0.3676, "step": 7310 }, { "epoch": 0.9781910623494782, "grad_norm": 45.113067626953125, "learning_rate": 2.180893765052181e-08, "loss": 0.2337, "step": 7311 }, { "epoch": 0.9783248595129783, "grad_norm": 34.66709518432617, "learning_rate": 2.1675140487021676e-08, "loss": 0.3348, "step": 7312 }, { "epoch": 0.9784586566764785, "grad_norm": 61.89415740966797, "learning_rate": 2.154134332352154e-08, "loss": 0.7115, "step": 7313 }, { "epoch": 0.9785924538399786, "grad_norm": 50.17863464355469, "learning_rate": 2.1407546160021408e-08, "loss": 0.3988, "step": 7314 }, { "epoch": 0.9787262510034788, "grad_norm": 38.222145080566406, "learning_rate": 2.1273748996521274e-08, "loss": 0.3135, "step": 7315 }, { "epoch": 0.9788600481669789, "grad_norm": 49.915565490722656, "learning_rate": 2.1139951833021137e-08, "loss": 0.335, "step": 7316 }, { "epoch": 0.978993845330479, "grad_norm": 41.13098907470703, "learning_rate": 2.1006154669521006e-08, "loss": 0.2946, "step": 7317 }, { "epoch": 0.9791276424939791, "grad_norm": 47.49373245239258, "learning_rate": 2.0872357506020872e-08, "loss": 0.191, "step": 7318 }, { "epoch": 0.9792614396574792, "grad_norm": 46.120548248291016, "learning_rate": 2.0738560342520735e-08, "loss": 0.3212, "step": 7319 }, { "epoch": 0.9793952368209794, "grad_norm": 42.64936447143555, "learning_rate": 2.0604763179020604e-08, "loss": 0.2578, "step": 7320 }, { "epoch": 0.9795290339844795, "grad_norm": 56.12272644042969, "learning_rate": 2.047096601552047e-08, "loss": 0.3674, "step": 7321 }, { "epoch": 0.9796628311479797, "grad_norm": 35.811058044433594, "learning_rate": 2.0337168852020336e-08, "loss": 0.389, "step": 7322 }, { "epoch": 0.9797966283114798, "grad_norm": 28.882612228393555, "learning_rate": 2.0203371688520202e-08, "loss": 0.207, "step": 7323 }, { "epoch": 0.9799304254749799, "grad_norm": 66.97712707519531, "learning_rate": 2.0069574525020068e-08, "loss": 0.5792, "step": 7324 }, { "epoch": 0.98006422263848, "grad_norm": 38.12857437133789, "learning_rate": 1.9935777361519938e-08, "loss": 0.4032, "step": 7325 }, { "epoch": 0.9801980198019802, "grad_norm": 50.153282165527344, "learning_rate": 1.98019801980198e-08, "loss": 0.4688, "step": 7326 }, { "epoch": 0.9803318169654803, "grad_norm": 38.876060485839844, "learning_rate": 1.9668183034519666e-08, "loss": 0.304, "step": 7327 }, { "epoch": 0.9804656141289805, "grad_norm": 104.2895736694336, "learning_rate": 1.9534385871019536e-08, "loss": 0.3485, "step": 7328 }, { "epoch": 0.9805994112924806, "grad_norm": 45.12457275390625, "learning_rate": 1.94005887075194e-08, "loss": 0.2762, "step": 7329 }, { "epoch": 0.9807332084559808, "grad_norm": 33.50376892089844, "learning_rate": 1.9266791544019264e-08, "loss": 0.2527, "step": 7330 }, { "epoch": 0.9808670056194808, "grad_norm": 29.88574981689453, "learning_rate": 1.9132994380519134e-08, "loss": 0.1969, "step": 7331 }, { "epoch": 0.981000802782981, "grad_norm": 42.12179946899414, "learning_rate": 1.8999197217018997e-08, "loss": 0.3151, "step": 7332 }, { "epoch": 0.9811345999464811, "grad_norm": 35.96826171875, "learning_rate": 1.8865400053518863e-08, "loss": 0.3338, "step": 7333 }, { "epoch": 0.9812683971099813, "grad_norm": 26.459869384765625, "learning_rate": 1.8731602890018732e-08, "loss": 0.1241, "step": 7334 }, { "epoch": 0.9814021942734814, "grad_norm": 61.092445373535156, "learning_rate": 1.8597805726518598e-08, "loss": 0.3597, "step": 7335 }, { "epoch": 0.9815359914369816, "grad_norm": 64.59573364257812, "learning_rate": 1.8464008563018464e-08, "loss": 0.487, "step": 7336 }, { "epoch": 0.9816697886004817, "grad_norm": 56.55144500732422, "learning_rate": 1.833021139951833e-08, "loss": 0.2617, "step": 7337 }, { "epoch": 0.9818035857639819, "grad_norm": 42.32561492919922, "learning_rate": 1.8196414236018196e-08, "loss": 0.2736, "step": 7338 }, { "epoch": 0.9819373829274819, "grad_norm": 42.058692932128906, "learning_rate": 1.8062617072518062e-08, "loss": 0.3146, "step": 7339 }, { "epoch": 0.982071180090982, "grad_norm": 41.72700500488281, "learning_rate": 1.7928819909017928e-08, "loss": 0.3864, "step": 7340 }, { "epoch": 0.9822049772544822, "grad_norm": 51.34474182128906, "learning_rate": 1.7795022745517794e-08, "loss": 0.3203, "step": 7341 }, { "epoch": 0.9823387744179823, "grad_norm": 45.54649353027344, "learning_rate": 1.766122558201766e-08, "loss": 0.4514, "step": 7342 }, { "epoch": 0.9824725715814825, "grad_norm": 50.14117431640625, "learning_rate": 1.7527428418517526e-08, "loss": 0.405, "step": 7343 }, { "epoch": 0.9826063687449826, "grad_norm": 74.18040466308594, "learning_rate": 1.7393631255017392e-08, "loss": 0.3866, "step": 7344 }, { "epoch": 0.9827401659084828, "grad_norm": 39.97657775878906, "learning_rate": 1.725983409151726e-08, "loss": 0.2877, "step": 7345 }, { "epoch": 0.9828739630719828, "grad_norm": 49.55459213256836, "learning_rate": 1.7126036928017124e-08, "loss": 0.3076, "step": 7346 }, { "epoch": 0.983007760235483, "grad_norm": 23.286975860595703, "learning_rate": 1.699223976451699e-08, "loss": 0.0841, "step": 7347 }, { "epoch": 0.9831415573989831, "grad_norm": 42.37560272216797, "learning_rate": 1.685844260101686e-08, "loss": 0.347, "step": 7348 }, { "epoch": 0.9832753545624833, "grad_norm": 56.112606048583984, "learning_rate": 1.6724645437516722e-08, "loss": 0.2181, "step": 7349 }, { "epoch": 0.9834091517259834, "grad_norm": 54.9824333190918, "learning_rate": 1.6590848274016592e-08, "loss": 0.5056, "step": 7350 }, { "epoch": 0.9835429488894836, "grad_norm": 46.233585357666016, "learning_rate": 1.6457051110516458e-08, "loss": 0.2663, "step": 7351 }, { "epoch": 0.9836767460529837, "grad_norm": 26.80943489074707, "learning_rate": 1.632325394701632e-08, "loss": 0.2081, "step": 7352 }, { "epoch": 0.9838105432164838, "grad_norm": 56.77552795410156, "learning_rate": 1.618945678351619e-08, "loss": 0.5511, "step": 7353 }, { "epoch": 0.9839443403799839, "grad_norm": 72.41248321533203, "learning_rate": 1.6055659620016056e-08, "loss": 0.2117, "step": 7354 }, { "epoch": 0.984078137543484, "grad_norm": 47.43149948120117, "learning_rate": 1.592186245651592e-08, "loss": 0.2822, "step": 7355 }, { "epoch": 0.9842119347069842, "grad_norm": 40.929359436035156, "learning_rate": 1.5788065293015788e-08, "loss": 0.2664, "step": 7356 }, { "epoch": 0.9843457318704844, "grad_norm": 36.576778411865234, "learning_rate": 1.5654268129515654e-08, "loss": 0.311, "step": 7357 }, { "epoch": 0.9844795290339845, "grad_norm": 28.544445037841797, "learning_rate": 1.552047096601552e-08, "loss": 0.3046, "step": 7358 }, { "epoch": 0.9846133261974847, "grad_norm": 47.01768493652344, "learning_rate": 1.5386673802515386e-08, "loss": 0.2796, "step": 7359 }, { "epoch": 0.9847471233609848, "grad_norm": 69.99488830566406, "learning_rate": 1.5252876639015252e-08, "loss": 0.7356, "step": 7360 }, { "epoch": 0.9848809205244848, "grad_norm": 48.732948303222656, "learning_rate": 1.5119079475515118e-08, "loss": 0.3773, "step": 7361 }, { "epoch": 0.985014717687985, "grad_norm": 32.15966033935547, "learning_rate": 1.4985282312014984e-08, "loss": 0.2908, "step": 7362 }, { "epoch": 0.9851485148514851, "grad_norm": 57.93793869018555, "learning_rate": 1.485148514851485e-08, "loss": 0.2401, "step": 7363 }, { "epoch": 0.9852823120149853, "grad_norm": 50.73208236694336, "learning_rate": 1.4717687985014716e-08, "loss": 0.4059, "step": 7364 }, { "epoch": 0.9854161091784854, "grad_norm": 32.038169860839844, "learning_rate": 1.4583890821514584e-08, "loss": 0.2364, "step": 7365 }, { "epoch": 0.9855499063419856, "grad_norm": 47.19532775878906, "learning_rate": 1.445009365801445e-08, "loss": 0.3763, "step": 7366 }, { "epoch": 0.9856837035054857, "grad_norm": 37.444984436035156, "learning_rate": 1.4316296494514314e-08, "loss": 0.1939, "step": 7367 }, { "epoch": 0.9858175006689858, "grad_norm": 24.196374893188477, "learning_rate": 1.4182499331014182e-08, "loss": 0.1602, "step": 7368 }, { "epoch": 0.9859512978324859, "grad_norm": 25.051870346069336, "learning_rate": 1.4048702167514048e-08, "loss": 0.2407, "step": 7369 }, { "epoch": 0.9860850949959861, "grad_norm": 57.400936126708984, "learning_rate": 1.3914905004013914e-08, "loss": 0.397, "step": 7370 }, { "epoch": 0.9862188921594862, "grad_norm": 29.043533325195312, "learning_rate": 1.378110784051378e-08, "loss": 0.2011, "step": 7371 }, { "epoch": 0.9863526893229864, "grad_norm": 40.36882781982422, "learning_rate": 1.3647310677013646e-08, "loss": 0.2803, "step": 7372 }, { "epoch": 0.9864864864864865, "grad_norm": 36.29819107055664, "learning_rate": 1.3513513513513514e-08, "loss": 0.313, "step": 7373 }, { "epoch": 0.9866202836499867, "grad_norm": 40.208213806152344, "learning_rate": 1.3379716350013378e-08, "loss": 0.2189, "step": 7374 }, { "epoch": 0.9867540808134867, "grad_norm": 42.971527099609375, "learning_rate": 1.3245919186513246e-08, "loss": 0.2665, "step": 7375 }, { "epoch": 0.9868878779769868, "grad_norm": 44.83416748046875, "learning_rate": 1.3112122023013112e-08, "loss": 0.4109, "step": 7376 }, { "epoch": 0.987021675140487, "grad_norm": 50.79045486450195, "learning_rate": 1.2978324859512976e-08, "loss": 0.3424, "step": 7377 }, { "epoch": 0.9871554723039871, "grad_norm": 40.505680084228516, "learning_rate": 1.2844527696012844e-08, "loss": 0.3326, "step": 7378 }, { "epoch": 0.9872892694674873, "grad_norm": 31.690134048461914, "learning_rate": 1.271073053251271e-08, "loss": 0.2444, "step": 7379 }, { "epoch": 0.9874230666309874, "grad_norm": 49.997718811035156, "learning_rate": 1.2576933369012578e-08, "loss": 0.4136, "step": 7380 }, { "epoch": 0.9875568637944876, "grad_norm": 86.46589660644531, "learning_rate": 1.2443136205512442e-08, "loss": 0.5063, "step": 7381 }, { "epoch": 0.9876906609579877, "grad_norm": 50.84786605834961, "learning_rate": 1.2309339042012308e-08, "loss": 0.2941, "step": 7382 }, { "epoch": 0.9878244581214878, "grad_norm": 56.440670013427734, "learning_rate": 1.2175541878512176e-08, "loss": 0.4556, "step": 7383 }, { "epoch": 0.9879582552849879, "grad_norm": 29.248525619506836, "learning_rate": 1.204174471501204e-08, "loss": 0.1869, "step": 7384 }, { "epoch": 0.9880920524484881, "grad_norm": 35.82600402832031, "learning_rate": 1.1907947551511906e-08, "loss": 0.2834, "step": 7385 }, { "epoch": 0.9882258496119882, "grad_norm": 47.741127014160156, "learning_rate": 1.1774150388011774e-08, "loss": 0.3309, "step": 7386 }, { "epoch": 0.9883596467754884, "grad_norm": 43.22308349609375, "learning_rate": 1.164035322451164e-08, "loss": 0.2848, "step": 7387 }, { "epoch": 0.9884934439389885, "grad_norm": 36.46099090576172, "learning_rate": 1.1506556061011506e-08, "loss": 0.234, "step": 7388 }, { "epoch": 0.9886272411024887, "grad_norm": 30.888853073120117, "learning_rate": 1.1372758897511372e-08, "loss": 0.2591, "step": 7389 }, { "epoch": 0.9887610382659887, "grad_norm": 40.25214385986328, "learning_rate": 1.1238961734011238e-08, "loss": 0.3597, "step": 7390 }, { "epoch": 0.9888948354294889, "grad_norm": 53.174346923828125, "learning_rate": 1.1105164570511104e-08, "loss": 0.3558, "step": 7391 }, { "epoch": 0.989028632592989, "grad_norm": 45.59877395629883, "learning_rate": 1.097136740701097e-08, "loss": 0.3191, "step": 7392 }, { "epoch": 0.9891624297564892, "grad_norm": 29.926084518432617, "learning_rate": 1.0837570243510838e-08, "loss": 0.2218, "step": 7393 }, { "epoch": 0.9892962269199893, "grad_norm": 48.67771911621094, "learning_rate": 1.0703773080010704e-08, "loss": 0.4362, "step": 7394 }, { "epoch": 0.9894300240834895, "grad_norm": 29.259708404541016, "learning_rate": 1.0569975916510568e-08, "loss": 0.142, "step": 7395 }, { "epoch": 0.9895638212469896, "grad_norm": 42.117740631103516, "learning_rate": 1.0436178753010436e-08, "loss": 0.4135, "step": 7396 }, { "epoch": 0.9896976184104896, "grad_norm": 35.88846206665039, "learning_rate": 1.0302381589510302e-08, "loss": 0.2669, "step": 7397 }, { "epoch": 0.9898314155739898, "grad_norm": 38.231937408447266, "learning_rate": 1.0168584426010168e-08, "loss": 0.2792, "step": 7398 }, { "epoch": 0.9899652127374899, "grad_norm": 47.823631286621094, "learning_rate": 1.0034787262510034e-08, "loss": 0.3769, "step": 7399 }, { "epoch": 0.9900990099009901, "grad_norm": 42.687744140625, "learning_rate": 9.9009900990099e-09, "loss": 0.2767, "step": 7400 }, { "epoch": 0.9902328070644902, "grad_norm": 59.036312103271484, "learning_rate": 9.767192935509768e-09, "loss": 0.3682, "step": 7401 }, { "epoch": 0.9903666042279904, "grad_norm": 54.60615921020508, "learning_rate": 9.633395772009632e-09, "loss": 0.53, "step": 7402 }, { "epoch": 0.9905004013914905, "grad_norm": 52.51370620727539, "learning_rate": 9.499598608509498e-09, "loss": 0.2556, "step": 7403 }, { "epoch": 0.9906341985549907, "grad_norm": 38.97259521484375, "learning_rate": 9.365801445009366e-09, "loss": 0.342, "step": 7404 }, { "epoch": 0.9907679957184907, "grad_norm": 56.8001708984375, "learning_rate": 9.232004281509232e-09, "loss": 0.4444, "step": 7405 }, { "epoch": 0.9909017928819909, "grad_norm": 37.354557037353516, "learning_rate": 9.098207118009098e-09, "loss": 0.2822, "step": 7406 }, { "epoch": 0.991035590045491, "grad_norm": 53.29143142700195, "learning_rate": 8.964409954508964e-09, "loss": 0.5196, "step": 7407 }, { "epoch": 0.9911693872089912, "grad_norm": 50.26121520996094, "learning_rate": 8.83061279100883e-09, "loss": 0.4202, "step": 7408 }, { "epoch": 0.9913031843724913, "grad_norm": 28.711589813232422, "learning_rate": 8.696815627508696e-09, "loss": 0.2702, "step": 7409 }, { "epoch": 0.9914369815359915, "grad_norm": 32.80230712890625, "learning_rate": 8.563018464008562e-09, "loss": 0.1645, "step": 7410 }, { "epoch": 0.9915707786994916, "grad_norm": 46.54493713378906, "learning_rate": 8.42922130050843e-09, "loss": 0.1907, "step": 7411 }, { "epoch": 0.9917045758629917, "grad_norm": 64.02152252197266, "learning_rate": 8.295424137008296e-09, "loss": 0.5899, "step": 7412 }, { "epoch": 0.9918383730264918, "grad_norm": 42.91229248046875, "learning_rate": 8.16162697350816e-09, "loss": 0.3263, "step": 7413 }, { "epoch": 0.991972170189992, "grad_norm": 36.952186584472656, "learning_rate": 8.027829810008028e-09, "loss": 0.2533, "step": 7414 }, { "epoch": 0.9921059673534921, "grad_norm": 42.519100189208984, "learning_rate": 7.894032646507894e-09, "loss": 0.3811, "step": 7415 }, { "epoch": 0.9922397645169923, "grad_norm": 70.8553237915039, "learning_rate": 7.76023548300776e-09, "loss": 0.6838, "step": 7416 }, { "epoch": 0.9923735616804924, "grad_norm": 53.46004104614258, "learning_rate": 7.626438319507626e-09, "loss": 0.399, "step": 7417 }, { "epoch": 0.9925073588439925, "grad_norm": 39.71845245361328, "learning_rate": 7.492641156007492e-09, "loss": 0.1604, "step": 7418 }, { "epoch": 0.9926411560074926, "grad_norm": 56.28370666503906, "learning_rate": 7.358843992507358e-09, "loss": 0.4204, "step": 7419 }, { "epoch": 0.9927749531709927, "grad_norm": 34.37940216064453, "learning_rate": 7.225046829007225e-09, "loss": 0.249, "step": 7420 }, { "epoch": 0.9929087503344929, "grad_norm": 62.55629348754883, "learning_rate": 7.091249665507091e-09, "loss": 0.6126, "step": 7421 }, { "epoch": 0.993042547497993, "grad_norm": 61.082149505615234, "learning_rate": 6.957452502006957e-09, "loss": 0.5441, "step": 7422 }, { "epoch": 0.9931763446614932, "grad_norm": 60.122859954833984, "learning_rate": 6.823655338506823e-09, "loss": 0.8385, "step": 7423 }, { "epoch": 0.9933101418249933, "grad_norm": 53.18011474609375, "learning_rate": 6.689858175006689e-09, "loss": 0.468, "step": 7424 }, { "epoch": 0.9934439389884935, "grad_norm": 27.294612884521484, "learning_rate": 6.556061011506556e-09, "loss": 0.2125, "step": 7425 }, { "epoch": 0.9935777361519936, "grad_norm": 56.26420974731445, "learning_rate": 6.422263848006422e-09, "loss": 0.4859, "step": 7426 }, { "epoch": 0.9937115333154937, "grad_norm": 34.36020278930664, "learning_rate": 6.288466684506289e-09, "loss": 0.2067, "step": 7427 }, { "epoch": 0.9938453304789938, "grad_norm": 29.505624771118164, "learning_rate": 6.154669521006154e-09, "loss": 0.1176, "step": 7428 }, { "epoch": 0.993979127642494, "grad_norm": 32.424049377441406, "learning_rate": 6.02087235750602e-09, "loss": 0.2956, "step": 7429 }, { "epoch": 0.9941129248059941, "grad_norm": 53.86669921875, "learning_rate": 5.887075194005887e-09, "loss": 0.3669, "step": 7430 }, { "epoch": 0.9942467219694943, "grad_norm": 26.348121643066406, "learning_rate": 5.753278030505753e-09, "loss": 0.1766, "step": 7431 }, { "epoch": 0.9943805191329944, "grad_norm": 49.4372444152832, "learning_rate": 5.619480867005619e-09, "loss": 0.4637, "step": 7432 }, { "epoch": 0.9945143162964946, "grad_norm": 66.36156463623047, "learning_rate": 5.485683703505485e-09, "loss": 0.4856, "step": 7433 }, { "epoch": 0.9946481134599946, "grad_norm": 29.544208526611328, "learning_rate": 5.351886540005352e-09, "loss": 0.2272, "step": 7434 }, { "epoch": 0.9947819106234947, "grad_norm": 33.34505081176758, "learning_rate": 5.218089376505218e-09, "loss": 0.2893, "step": 7435 }, { "epoch": 0.9949157077869949, "grad_norm": 62.02311325073242, "learning_rate": 5.084292213005084e-09, "loss": 0.231, "step": 7436 }, { "epoch": 0.995049504950495, "grad_norm": 40.44163513183594, "learning_rate": 4.95049504950495e-09, "loss": 0.3692, "step": 7437 }, { "epoch": 0.9951833021139952, "grad_norm": 45.52751922607422, "learning_rate": 4.816697886004816e-09, "loss": 0.3756, "step": 7438 }, { "epoch": 0.9953170992774953, "grad_norm": 29.10796546936035, "learning_rate": 4.682900722504683e-09, "loss": 0.2344, "step": 7439 }, { "epoch": 0.9954508964409955, "grad_norm": 25.711074829101562, "learning_rate": 4.549103559004549e-09, "loss": 0.1767, "step": 7440 }, { "epoch": 0.9955846936044955, "grad_norm": 38.57344055175781, "learning_rate": 4.415306395504415e-09, "loss": 0.413, "step": 7441 }, { "epoch": 0.9957184907679957, "grad_norm": 37.96451950073242, "learning_rate": 4.281509232004281e-09, "loss": 0.2591, "step": 7442 }, { "epoch": 0.9958522879314958, "grad_norm": 51.25944900512695, "learning_rate": 4.147712068504148e-09, "loss": 0.3078, "step": 7443 }, { "epoch": 0.995986085094996, "grad_norm": 59.25737380981445, "learning_rate": 4.013914905004014e-09, "loss": 0.3191, "step": 7444 }, { "epoch": 0.9961198822584961, "grad_norm": 41.720577239990234, "learning_rate": 3.88011774150388e-09, "loss": 0.3101, "step": 7445 }, { "epoch": 0.9962536794219963, "grad_norm": 41.49711227416992, "learning_rate": 3.746320578003746e-09, "loss": 0.389, "step": 7446 }, { "epoch": 0.9963874765854964, "grad_norm": 35.57976150512695, "learning_rate": 3.6125234145036125e-09, "loss": 0.1842, "step": 7447 }, { "epoch": 0.9965212737489966, "grad_norm": 53.53559494018555, "learning_rate": 3.4787262510034785e-09, "loss": 0.291, "step": 7448 }, { "epoch": 0.9966550709124966, "grad_norm": 29.136011123657227, "learning_rate": 3.3449290875033446e-09, "loss": 0.1829, "step": 7449 }, { "epoch": 0.9967888680759968, "grad_norm": 55.82667922973633, "learning_rate": 3.211131924003211e-09, "loss": 0.5442, "step": 7450 }, { "epoch": 0.9969226652394969, "grad_norm": 39.31253433227539, "learning_rate": 3.077334760503077e-09, "loss": 0.292, "step": 7451 }, { "epoch": 0.9970564624029971, "grad_norm": 49.504920959472656, "learning_rate": 2.9435375970029435e-09, "loss": 0.4284, "step": 7452 }, { "epoch": 0.9971902595664972, "grad_norm": 42.16488265991211, "learning_rate": 2.8097404335028095e-09, "loss": 0.3976, "step": 7453 }, { "epoch": 0.9973240567299974, "grad_norm": 48.79595184326172, "learning_rate": 2.675943270002676e-09, "loss": 0.474, "step": 7454 }, { "epoch": 0.9974578538934975, "grad_norm": 42.57289123535156, "learning_rate": 2.542146106502542e-09, "loss": 0.4383, "step": 7455 }, { "epoch": 0.9975916510569975, "grad_norm": 57.079246520996094, "learning_rate": 2.408348943002408e-09, "loss": 0.3427, "step": 7456 }, { "epoch": 0.9977254482204977, "grad_norm": 37.128238677978516, "learning_rate": 2.2745517795022745e-09, "loss": 0.1862, "step": 7457 }, { "epoch": 0.9978592453839978, "grad_norm": 41.84012985229492, "learning_rate": 2.1407546160021405e-09, "loss": 0.3122, "step": 7458 }, { "epoch": 0.997993042547498, "grad_norm": 35.29287338256836, "learning_rate": 2.006957452502007e-09, "loss": 0.2883, "step": 7459 }, { "epoch": 0.9981268397109981, "grad_norm": 45.35420608520508, "learning_rate": 1.873160289001873e-09, "loss": 0.2774, "step": 7460 }, { "epoch": 0.9982606368744983, "grad_norm": 48.7961311340332, "learning_rate": 1.7393631255017393e-09, "loss": 0.2745, "step": 7461 }, { "epoch": 0.9983944340379984, "grad_norm": 46.16399383544922, "learning_rate": 1.6055659620016055e-09, "loss": 0.2633, "step": 7462 }, { "epoch": 0.9985282312014985, "grad_norm": 27.275243759155273, "learning_rate": 1.4717687985014717e-09, "loss": 0.3401, "step": 7463 }, { "epoch": 0.9986620283649986, "grad_norm": 46.21627426147461, "learning_rate": 1.337971635001338e-09, "loss": 0.4427, "step": 7464 }, { "epoch": 0.9987958255284988, "grad_norm": 34.45832824707031, "learning_rate": 1.204174471501204e-09, "loss": 0.2451, "step": 7465 }, { "epoch": 0.9989296226919989, "grad_norm": 42.79619598388672, "learning_rate": 1.0703773080010703e-09, "loss": 0.2445, "step": 7466 }, { "epoch": 0.9990634198554991, "grad_norm": 31.18369483947754, "learning_rate": 9.365801445009365e-10, "loss": 0.2225, "step": 7467 }, { "epoch": 0.9991972170189992, "grad_norm": 45.32524490356445, "learning_rate": 8.027829810008028e-10, "loss": 0.45, "step": 7468 }, { "epoch": 0.9993310141824994, "grad_norm": 32.951637268066406, "learning_rate": 6.68985817500669e-10, "loss": 0.1692, "step": 7469 }, { "epoch": 0.9994648113459995, "grad_norm": 27.284467697143555, "learning_rate": 5.351886540005351e-10, "loss": 0.3075, "step": 7470 }, { "epoch": 0.9995986085094996, "grad_norm": 46.003753662109375, "learning_rate": 4.013914905004014e-10, "loss": 0.2733, "step": 7471 }, { "epoch": 0.9997324056729997, "grad_norm": 30.985803604125977, "learning_rate": 2.6759432700026757e-10, "loss": 0.2529, "step": 7472 }, { "epoch": 0.9998662028364999, "grad_norm": 49.59328842163086, "learning_rate": 1.3379716350013378e-10, "loss": 0.4915, "step": 7473 }, { "epoch": 1.0, "grad_norm": 48.988380432128906, "learning_rate": 0.0, "loss": 0.3809, "step": 7474 } ], "logging_steps": 1.0, "max_steps": 7474, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1631197020844196e+18, "train_batch_size": 3, "trial_name": null, "trial_params": null }